LLVM: lib/Target/AMDGPU/AMDGPUSplitModule.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
59#include
60#include
61#include
62#include
63#include
64
65#ifndef NDEBUG
67#endif
68
69#define DEBUG_TYPE "amdgpu-split-module"
70
71namespace llvm {
72namespace {
73
75 "amdgpu-module-splitting-max-depth",
76 cl::desc(
77 "maximum search depth. 0 forces a greedy approach. "
78 "warning: the algorithm is up to O(2^N), where N is the max depth."),
80
81static cl::opt LargeFnFactor(
82 "amdgpu-module-splitting-large-threshold", cl::init(2.0f), cl::Hidden,
83 cl::desc(
84 "when max depth is reached and we can no longer branch out, this "
85 "value determines if a function is worth merging into an already "
86 "existing partition to reduce code duplication. This is a factor "
87 "of the ideal partition size, e.g. 2.0 means we consider the "
88 "function for merging if its cost (including its callees) is 2x the "
89 "size of an ideal partition."));
90
91static cl::opt LargeFnOverlapForMerge(
92 "amdgpu-module-splitting-merge-threshold", cl::init(0.7f), cl::Hidden,
93 cl::desc("when a function is considered for merging into a partition that "
94 "already contains some of its callees, do the merge if at least "
95 "n% of the code it can reach is already present inside the "
96 "partition; e.g. 0.7 means only merge >70%"));
97
99 "amdgpu-module-splitting-no-externalize-globals", cl::Hidden,
100 cl::desc("disables externalization of global variable with local linkage; "
101 "may cause globals to be duplicated which increases binary size"));
102
104 "amdgpu-module-splitting-no-externalize-address-taken", cl::Hidden,
105 cl::desc(
106 "disables externalization of functions whose addresses are taken"));
107
109 ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
111 cl::desc("output file to write out the dotgraph "
112 "representation of the input module"));
113
115 "amdgpu-module-splitting-print-partition-summaries", cl::Hidden,
116 cl::desc("output file to write out a summary of "
117 "the partitions created for each module"));
118
119#ifndef NDEBUG
121 UseLockFile("amdgpu-module-splitting-serial-execution", cl::Hidden,
122 cl::desc("use a lock file so only one process in the system "
123 "can run this pass at once. useful to avoid mangled "
124 "debug output in multithreaded environments."));
125
127 DebugProposalSearch("amdgpu-module-splitting-debug-proposal-search",
129 cl::desc("print all proposals received and whether "
130 "they were rejected or accepted"));
131#endif
132
133struct SplitModuleTimer : NamedRegionTimer {
134 SplitModuleTimer(StringRef Name, StringRef Desc)
135 : NamedRegionTimer(Name, Desc, DEBUG_TYPE, "AMDGPU Module Splitting",
137};
138
139
140
141
142
144using FunctionsCostMap = DenseMap<const Function *, CostType>;
145using GetTTIFn = function_ref<const TargetTransformInfo &(Function &)>;
146static constexpr unsigned InvalidPID = -1;
147
148
149
150
151static auto formatRatioOf(CostType Num, CostType Dem) {
152 CostType DemOr1 = Dem ? Dem : 1;
153 return format("%0.2f", (static_cast<double>(Num) / DemOr1) * 100);
154}
155
156
157
158
159
160
161
162
163
164static bool isNonCopyable(const Function &F) {
165 return F.hasExternalLinkage() || .isDefinitionExact() ||
167}
168
169
170static void externalize(GlobalValue &GV) {
171 if (GV.hasLocalLinkage()) {
174 }
175
176
177
178 if (!GV.hasName())
179 GV.setName("__llvmsplit_unnamed");
180}
181
182
183
184
185
186
187
188static CostType calculateFunctionCosts(GetTTIFn GetTTI, Module &M,
189 FunctionsCostMap &CostMap) {
190 SplitModuleTimer SMT("calculateFunctionCosts", "cost analysis");
191
192 LLVM_DEBUG(dbgs() << "[cost analysis] calculating function costs\n");
193 CostType ModuleCost = 0;
194 [[maybe_unused]] CostType KernelCost = 0;
195
196 for (auto &Fn : M) {
197 if (Fn.isDeclaration())
198 continue;
199
200 CostType FnCost = 0;
201 const auto &TTI = GetTTI(Fn);
202 for (const auto &BB : Fn) {
203 for (const auto &I : BB) {
204 auto Cost =
207
208 CostType CostVal = Cost.isValid()
209 ? Cost.getValue()
211 assert((FnCost + CostVal) >= FnCost && "Overflow!");
212 FnCost += CostVal;
213 }
214 }
215
217
218 CostMap[&Fn] = FnCost;
219 assert((ModuleCost + FnCost) >= ModuleCost && "Overflow!");
220 ModuleCost += FnCost;
221
223 KernelCost += FnCost;
224 }
225
226 if (CostMap.empty())
227 return 0;
228
231 const CostType FnCost = ModuleCost - KernelCost;
232 dbgs() << " - total module cost is " << ModuleCost << ". kernels cost "
233 << "" << KernelCost << " ("
234 << format("%0.2f", (float(KernelCost) / ModuleCost) * 100)
235 << "% of the module), functions cost " << FnCost << " ("
236 << format("%0.2f", (float(FnCost) / ModuleCost) * 100)
237 << "% of the module)\n";
238 });
239
240 return ModuleCost;
241}
242
243
244static bool canBeIndirectlyCalled(const Function &F) {
246 return false;
247 return .hasLocalLinkage() ||
248 F.hasAddressTaken(nullptr,
249 false,
250 true,
251 true,
252 false,
253 true);
254}
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272class SplitGraph {
273public:
275
276 enum class EdgeKind : uint8_t {
277
278
280
281
282
283
284
285
286
287
289 };
290
291
292
293 struct Edge {
295 : Src(Src), Dst(Dst), Kind(Kind) {}
296
297 Node *Src;
298 Node *Dst;
299 EdgeKind Kind;
300 };
301
302 using EdgesVec = SmallVector<const Edge *, 0>;
304 using nodes_iterator = const Node *const *;
305
306 SplitGraph(const Module &M, const FunctionsCostMap &CostMap,
307 CostType ModuleCost)
308 : M(M), CostMap(CostMap), ModuleCost(ModuleCost) {}
309
310 void buildGraph(CallGraph &CG);
311
312#ifndef NDEBUG
313 bool verifyGraph() const;
314#endif
315
316 bool empty() const { return Nodes.empty(); }
318 const Node &getNode(unsigned ID) const { return *Nodes[ID]; }
319
320 unsigned getNumNodes() const { return Nodes.size(); }
321 BitVector createNodesBitVector() const { return BitVector(Nodes.size()); }
322
323 const Module &getModule() const { return M; }
324
325 CostType getModuleCost() const { return ModuleCost; }
326 CostType getCost(const Function &F) const { return CostMap.at(&F); }
327
328
329
330 CostType calculateCost(const BitVector &BV) const;
331
332private:
333
334
335 Node &getNode(DenseMap<const GlobalValue *, Node *> &Cache,
336 const GlobalValue &GV);
337
338
339 const Edge &createEdge(Node &Src, Node &Dst, EdgeKind EK);
340
342 const FunctionsCostMap &CostMap;
343 CostType ModuleCost;
344
345
347
348 SpecificBumpPtrAllocator NodesPool;
349
350
351
352
353 static_assert(
354 std::is_trivially_destructible_v,
355 "Edge must be trivially destructible to use the BumpPtrAllocator");
357};
358
359
360
361
362
363
364
365
366
367
368
369
370
371class SplitGraph::Node {
372 friend class SplitGraph;
373
374public:
375 Node(unsigned ID, const GlobalValue &GV, CostType IndividualCost,
376 bool IsNonCopyable)
377 : ID(ID), GV(GV), IndividualCost(IndividualCost),
378 IsNonCopyable(IsNonCopyable), IsEntryFnCC(false), IsGraphEntry(false) {
381 }
382
383
384
385 unsigned getID() const { return ID; }
386
388
389
390
391 CostType getIndividualCost() const { return IndividualCost; }
392
393 bool isNonCopyable() const { return IsNonCopyable; }
394 bool isEntryFunctionCC() const { return IsEntryFnCC; }
395
396
397
398
399
400 bool isGraphEntryPoint() const { return IsGraphEntry; }
401
402 StringRef getName() const { return GV.getName(); }
403
404 bool hasAnyIncomingEdges() const { return IncomingEdges.size(); }
405 bool hasAnyIncomingEdgesOfKind(EdgeKind EK) const {
406 return any_of(IncomingEdges, [&](const auto *E) { return E->Kind == EK; });
407 }
408
409 bool hasAnyOutgoingEdges() const { return OutgoingEdges.size(); }
410 bool hasAnyOutgoingEdgesOfKind(EdgeKind EK) const {
411 return any_of(OutgoingEdges, [&](const auto *E) { return E->Kind == EK; });
412 }
413
415 return IncomingEdges;
416 }
417
419 return OutgoingEdges;
420 }
421
422 bool shouldFollowIndirectCalls() const { return isEntryFunctionCC(); }
423
424
425
426
427
428
429 void visitAllDependencies(std::function<void(const Node &)> Visitor) const;
430
431
432
433
434
435
436
437
438 void getDependencies(BitVector &BV) const {
439 visitAllDependencies([&](const Node &N) { BV.set(N.getID()); });
440 }
441
442private:
443 void markAsGraphEntry() { IsGraphEntry = true; }
444
445 unsigned ID;
446 const GlobalValue &GV;
447 CostType IndividualCost;
448 bool IsNonCopyable : 1;
449 bool IsEntryFnCC : 1;
450 bool IsGraphEntry : 1;
451
452
453
454 EdgesVec IncomingEdges;
455 EdgesVec OutgoingEdges;
456};
457
458void SplitGraph::Node::visitAllDependencies(
459 std::function<void(const Node &)> Visitor) const {
460 const bool FollowIndirect = shouldFollowIndirectCalls();
461
462
463 DenseSet<const Node *> Seen;
464 SmallVector<const Node *, 8> WorkList({this});
465 while (!WorkList.empty()) {
466 const Node *CurN = WorkList.pop_back_val();
467 if (auto [It, Inserted] = Seen.insert(CurN); !Inserted)
468 continue;
469
470 Visitor(*CurN);
471
472 for (const Edge *E : CurN->outgoing_edges()) {
473 if (!FollowIndirect && E->Kind == EdgeKind::IndirectCall)
474 continue;
475 WorkList.push_back(E->Dst);
476 }
477 }
478}
479
480
481
482
483
484
485
486
487static bool handleCalleesMD(const Instruction &I,
488 SetVector<Function *> &Callees) {
489 auto *MD = I.getMetadata(LLVMContext::MD_callees);
490 if (!MD)
491 return false;
492
493 for (const auto &Op : MD->operands()) {
494 Function *Callee = mdconst::extract_or_null(Op);
495 if (!Callee)
496 return false;
497 Callees.insert(Callee);
498 }
499
500 return true;
501}
502
503void SplitGraph::buildGraph(CallGraph &CG) {
504 SplitModuleTimer SMT("buildGraph", "graph construction");
506 dbgs()
507 << "[build graph] constructing graph representation of the input\n");
508
509
510
511
512
513
514
515 DenseMap<const GlobalValue *, Node *> Cache;
516 SmallVector<const Function *> FnsWithIndirectCalls, IndirectlyCallableFns;
517 for (const Function &Fn : M) {
518 if (Fn.isDeclaration())
519 continue;
520
521
522 SetVector<const Function *> DirectCallees;
523 bool CallsExternal = false;
524 for (auto &CGEntry : *CG[&Fn]) {
525 auto *CGNode = CGEntry.second;
526 if (auto *Callee = CGNode->getFunction()) {
527 if (!Callee->isDeclaration())
528 DirectCallees.insert(Callee);
529 } else if (CGNode == CG.getCallsExternalNode())
530 CallsExternal = true;
531 }
532
533
534
535 if (CallsExternal) {
536 LLVM_DEBUG(dbgs() << " [!] callgraph is incomplete for ";
537 Fn.printAsOperand(dbgs());
538 dbgs() << " - analyzing function\n");
539
540 SetVector<Function *> KnownCallees;
541 bool HasUnknownIndirectCall = false;
543
544 const auto *CB = dyn_cast(&Inst);
545 if (!CB || CB->getCalledFunction())
546 continue;
547
548
549
550 if (CB->isInlineAsm()) {
551 LLVM_DEBUG(dbgs() << " found inline assembly\n");
552 continue;
553 }
554
555 if (handleCalleesMD(Inst, KnownCallees))
556 continue;
557
558
559 KnownCallees.clear();
560
561
562
563 HasUnknownIndirectCall = true;
564 break;
565 }
566
567 if (HasUnknownIndirectCall) {
568 LLVM_DEBUG(dbgs() << " indirect call found\n");
569 FnsWithIndirectCalls.push_back(&Fn);
570 } else if (!KnownCallees.empty())
571 DirectCallees.insert_range(KnownCallees);
572 }
573
575 for (const auto *Callee : DirectCallees)
576 createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
577
578 if (canBeIndirectlyCalled(Fn))
579 IndirectlyCallableFns.push_back(&Fn);
580 }
581
582
583 for (const Function *Fn : FnsWithIndirectCalls) {
584 for (const Function *Candidate : IndirectlyCallableFns) {
587 createEdge(Src, Dst, EdgeKind::IndirectCall);
588 }
589 }
590
591
592 SmallVector<Node *, 16> CandidateEntryPoints;
593 BitVector NodesReachableByKernels = createNodesBitVector();
595
596 if (N->isEntryFunctionCC()) {
597 N->markAsGraphEntry();
598 N->getDependencies(NodesReachableByKernels);
599 } else if (->hasAnyIncomingEdgesOfKind(EdgeKind::DirectCall))
600 CandidateEntryPoints.push_back(N);
601 }
602
603 for (Node *N : CandidateEntryPoints) {
604
605
606
607
608
609 if (!NodesReachableByKernels.test(N->getID()))
610 N->markAsGraphEntry();
611 }
612
613#ifndef NDEBUG
614 assert(verifyGraph());
615#endif
616}
617
618#ifndef NDEBUG
619bool SplitGraph::verifyGraph() const {
620 unsigned ExpectedID = 0;
621
622 DenseSet<const Node *> SeenNodes;
623 DenseSet<const Function *> SeenFunctionNodes;
624 for (const Node *N : Nodes) {
625 if (N->getID() != (ExpectedID++)) {
626 errs() << "Node IDs are incorrect!\n";
627 return false;
628 }
629
630 if (!SeenNodes.insert(N).second) {
631 errs() << "Node seen more than once!\n";
632 return false;
633 }
634
636 errs() << "getNode doesn't return the right node\n";
637 return false;
638 }
639
640 for (const Edge *E : N->IncomingEdges) {
641 if (->Src ||
->Dst || (E->Dst != N) ||
642 (find(E->Src->OutgoingEdges, E) == E->Src->OutgoingEdges.end())) {
643 errs() << "ill-formed incoming edges\n";
644 return false;
645 }
646 }
647
648 for (const Edge *E : N->OutgoingEdges) {
649 if (->Src ||
->Dst || (E->Src != N) ||
650 (find(E->Dst->IncomingEdges, E) == E->Dst->IncomingEdges.end())) {
651 errs() << "ill-formed outgoing edges\n";
652 return false;
653 }
654 }
655
656 const Function &Fn = N->getFunction();
657 if (AMDGPU::isEntryFunctionCC(Fn.getCallingConv())) {
658 if (N->hasAnyIncomingEdges()) {
659 errs() << "Kernels cannot have incoming edges\n";
660 return false;
661 }
662 }
663
664 if (Fn.isDeclaration()) {
665 errs() << "declarations shouldn't have nodes!\n";
666 return false;
667 }
668
669 auto [It, Inserted] = SeenFunctionNodes.insert(&Fn);
670 if (!Inserted) {
671 errs() << "one function has multiple nodes!\n";
672 return false;
673 }
674 }
675
676 if (ExpectedID != Nodes.size()) {
677 errs() << "Node IDs out of sync!\n";
678 return false;
679 }
680
681 if (createNodesBitVector().size() != getNumNodes()) {
682 errs() << "nodes bit vector doesn't have the right size!\n";
683 return false;
684 }
685
686
687 BitVector BV = createNodesBitVector();
689 if (N->isGraphEntryPoint())
690 N->getDependencies(BV);
691 }
692
693
694 for (const auto &Fn : M) {
695 if (!Fn.isDeclaration()) {
696 if (!SeenFunctionNodes.contains(&Fn)) {
697 errs() << "Fn has no associated node in the graph!\n";
698 return false;
699 }
700 }
701 }
702
703 if (!BV.all()) {
704 errs() << "not all nodes are reachable through the graph's entry points!\n";
705 return false;
706 }
707
708 return true;
709}
710#endif
711
712CostType SplitGraph::calculateCost(const BitVector &BV) const {
713 CostType Cost = 0;
714 for (unsigned NodeID : BV.set_bits())
715 Cost += getNode(NodeID).getIndividualCost();
716 return Cost;
717}
718
719SplitGraph::Node &
720SplitGraph::getNode(DenseMap<const GlobalValue *, Node *> &Cache,
721 const GlobalValue &GV) {
722 auto &N = Cache[&GV];
723 if (N)
724 return *N;
725
726 CostType Cost = 0;
727 bool NonCopyable = false;
728 if (const Function *Fn = dyn_cast(&GV)) {
729 NonCopyable = isNonCopyable(*Fn);
730 Cost = CostMap.at(Fn);
731 }
732 N = new (NodesPool.Allocate()) Node(Nodes.size(), GV, Cost, NonCopyable);
733 Nodes.push_back(N);
735 return *N;
736}
737
738const SplitGraph::Edge &SplitGraph::createEdge(Node &Src, Node &Dst,
739 EdgeKind EK) {
740 const Edge *E = new (EdgesPool.Allocate(1)) Edge(&Src, &Dst, EK);
741 Src.OutgoingEdges.push_back(E);
742 Dst.IncomingEdges.push_back(E);
743 return *E;
744}
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759class SplitProposal {
760public:
761 SplitProposal(const SplitGraph &SG, unsigned MaxPartitions) : SG(&SG) {
762 Partitions.resize(MaxPartitions, {0, SG.createNodesBitVector()});
763 }
764
765 void setName(StringRef NewName) { Name = NewName; }
766 StringRef getName() const { return Name; }
767
768 const BitVector &operator[](unsigned PID) const {
769 return Partitions[PID].second;
770 }
771
772 void add(unsigned PID, const BitVector &BV) {
773 Partitions[PID].second |= BV;
774 updateScore(PID);
775 }
776
777 void print(raw_ostream &OS) const;
779
780
781
782 unsigned findCheapestPartition() const;
783
784
785 void calculateScores();
786
787#ifndef NDEBUG
788 void verifyCompleteness() const;
789#endif
790
791
792
793
794
795
796
797
798
799
800 double getCodeSizeScore() const { return CodeSizeScore; }
801
802
803
804
805
806
807
808
809
810
811
812
813
814 double getBottleneckScore() const { return BottleneckScore; }
815
816private:
817 void updateScore(unsigned PID) {
819 for (auto &[PCost, Nodes] : Partitions) {
820 TotalCost -= PCost;
821 PCost = SG->calculateCost(Nodes);
822 TotalCost += PCost;
823 }
824 }
825
826
827 double CodeSizeScore = 0.0;
828
829 double BottleneckScore = 0.0;
830
831 CostType TotalCost = 0;
832
833 const SplitGraph *SG = nullptr;
834 std::string Name;
835
836 std::vector<std::pair<CostType, BitVector>> Partitions;
837};
838
839void SplitProposal::print(raw_ostream &OS) const {
841
842 OS << "[proposal] " << Name << ", total cost:" << TotalCost
843 << ", code size score:" << format("%0.3f", CodeSizeScore)
844 << ", bottleneck score:" << format("%0.3f", BottleneckScore) << '\n';
845 for (const auto &[PID, Part] : enumerate(Partitions)) {
846 const auto &[Cost, NodeIDs] = Part;
847 OS << " - P" << PID << " nodes:" << NodeIDs.count() << " cost: " << Cost
848 << '|' << formatRatioOf(Cost, SG->getModuleCost()) << "%\n";
849 }
850}
851
852unsigned SplitProposal::findCheapestPartition() const {
853 assert(!Partitions.empty());
854 CostType CurCost = std::numeric_limits::max();
855 unsigned CurPID = InvalidPID;
856 for (const auto &[Idx, Part] : enumerate(Partitions)) {
857 if (Part.first <= CurCost) {
858 CurPID = Idx;
859 CurCost = Part.first;
860 }
861 }
862 assert(CurPID != InvalidPID);
863 return CurPID;
864}
865
866void SplitProposal::calculateScores() {
867 if (Partitions.empty())
868 return;
869
871 CostType LargestPCost = 0;
872 for (auto &[PCost, Nodes] : Partitions) {
873 if (PCost > LargestPCost)
874 LargestPCost = PCost;
875 }
876
877 CostType ModuleCost = SG->getModuleCost();
878 CodeSizeScore = double(TotalCost) / ModuleCost;
879 assert(CodeSizeScore >= 0.0);
880
881 BottleneckScore = double(LargestPCost) / ModuleCost;
882
883 CodeSizeScore = std::ceil(CodeSizeScore * 100.0) / 100.0;
884 BottleneckScore = std::ceil(BottleneckScore * 100.0) / 100.0;
885}
886
887#ifndef NDEBUG
888void SplitProposal::verifyCompleteness() const {
889 if (Partitions.empty())
890 return;
891
892 BitVector Result = Partitions[0].second;
893 for (const auto &P : drop_begin(Partitions))
894 Result |= P.second;
895 assert(Result.all() && "some nodes are missing from this proposal!");
896}
897#endif
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914class RecursiveSearchSplitting {
915public:
916 using SubmitProposalFn = function_ref<void(SplitProposal)>;
917
918 RecursiveSearchSplitting(const SplitGraph &SG, unsigned NumParts,
919 SubmitProposalFn SubmitProposal);
920
921 void run();
922
923private:
924 struct WorkListEntry {
925 WorkListEntry(const BitVector &BV) : Cluster(BV) {}
926
927 unsigned NumNonEntryNodes = 0;
928 CostType TotalCost = 0;
929 CostType CostExcludingGraphEntryPoints = 0;
930 BitVector Cluster;
931 };
932
933
934
935
936 void setupWorkList();
937
938
939
940
941
942
943
944
945
946
947 void pickPartition(unsigned Depth, unsigned Idx, SplitProposal SP);
948
949
950
951
952
953
954 std::pair<unsigned, CostType>
955 findMostSimilarPartition(const WorkListEntry &Entry, const SplitProposal &SP);
956
957 const SplitGraph &SG;
958 unsigned NumParts;
959 SubmitProposalFn SubmitProposal;
960
961
962
963 CostType LargeClusterThreshold = 0;
964 unsigned NumProposalsSubmitted = 0;
965 SmallVector WorkList;
966};
967
968RecursiveSearchSplitting::RecursiveSearchSplitting(
969 const SplitGraph &SG, unsigned NumParts, SubmitProposalFn SubmitProposal)
970 : SG(SG), NumParts(NumParts), SubmitProposal(SubmitProposal) {
971
972
973
974 if (MaxDepth > 16)
975 report_fatal_error("[amdgpu-split-module] search depth of " +
976 Twine(MaxDepth) + " is too high!");
977 LargeClusterThreshold =
978 (LargeFnFactor != 0.0)
979 ? CostType(((SG.getModuleCost() / NumParts) * LargeFnFactor))
980 : std::numeric_limits::max();
981 LLVM_DEBUG(dbgs() << "[recursive search] large cluster threshold set at "
982 << LargeClusterThreshold << "\n");
983}
984
985void RecursiveSearchSplitting::run() {
986 {
987 SplitModuleTimer SMT("recursive_search_prepare", "preparing worklist");
988 setupWorkList();
989 }
990
991 {
992 SplitModuleTimer SMT("recursive_search_pick", "partitioning");
993 SplitProposal SP(SG, NumParts);
994 pickPartition(0, 0, SP);
995 }
996}
997
998void RecursiveSearchSplitting::setupWorkList() {
999
1000
1001
1002
1003
1004
1005
1006 EquivalenceClasses NodeEC;
1007 for (const SplitGraph::Node *N : SG.nodes()) {
1008 if (->isGraphEntryPoint())
1009 continue;
1010
1011 NodeEC.insert(N->getID());
1012 N->visitAllDependencies([&](const SplitGraph::Node &Dep) {
1013 if (&Dep != N && Dep.isNonCopyable())
1014 NodeEC.unionSets(N->getID(), Dep.getID());
1015 });
1016 }
1017
1018 for (const auto &Node : NodeEC) {
1019 if (->isLeader())
1020 continue;
1021
1022 BitVector Cluster = SG.createNodesBitVector();
1023 for (unsigned M : NodeEC.members(*Node)) {
1024 const SplitGraph::Node &N = SG.getNode(M);
1025 if (N.isGraphEntryPoint())
1026 N.getDependencies(Cluster);
1027 }
1028 WorkList.emplace_back(std::move(Cluster));
1029 }
1030
1031
1032 for (WorkListEntry &Entry : WorkList) {
1033 for (unsigned NodeID : Entry.Cluster.set_bits()) {
1034 const SplitGraph::Node &N = SG.getNode(NodeID);
1035 const CostType Cost = N.getIndividualCost();
1036
1037 Entry.TotalCost += Cost;
1038 if (.isGraphEntryPoint()) {
1039 Entry.CostExcludingGraphEntryPoints += Cost;
1040 ++Entry.NumNonEntryNodes;
1041 }
1042 }
1043 }
1044
1045 stable_sort(WorkList, [](const WorkListEntry &A, const WorkListEntry &B) {
1046 if (A.TotalCost != B.TotalCost)
1047 return A.TotalCost > B.TotalCost;
1048
1049 if (A.CostExcludingGraphEntryPoints != B.CostExcludingGraphEntryPoints)
1050 return A.CostExcludingGraphEntryPoints > B.CostExcludingGraphEntryPoints;
1051
1052 if (A.NumNonEntryNodes != B.NumNonEntryNodes)
1053 return A.NumNonEntryNodes > B.NumNonEntryNodes;
1054
1055 return A.Cluster.count() > B.Cluster.count();
1056 });
1057
1059 dbgs() << "[recursive search] worklist:\n";
1060 for (const auto &[Idx, Entry] : enumerate(WorkList)) {
1061 dbgs() << " - [" << Idx << "]: ";
1062 for (unsigned NodeID : Entry.Cluster.set_bits())
1063 dbgs() << NodeID << " ";
1064 dbgs() << "(total_cost:" << Entry.TotalCost
1065 << ", cost_excl_entries:" << Entry.CostExcludingGraphEntryPoints
1066 << ")\n";
1067 }
1068 });
1069}
1070
1071void RecursiveSearchSplitting::pickPartition(unsigned Depth, unsigned Idx,
1072 SplitProposal SP) {
1073 while (Idx < WorkList.size()) {
1074
1075
1076 const WorkListEntry &Entry = WorkList[Idx];
1077 const BitVector &Cluster = Entry.Cluster;
1078
1079
1080
1081 const unsigned CheapestPID = SP.findCheapestPartition();
1082 assert(CheapestPID != InvalidPID);
1083
1084
1085
1086 const auto [MostSimilarPID, SimilarDepsCost] =
1087 findMostSimilarPartition(Entry, SP);
1088
1089
1090
1091 unsigned SinglePIDToTry = InvalidPID;
1092 if (MostSimilarPID == InvalidPID)
1093 SinglePIDToTry = CheapestPID;
1094 else if (MostSimilarPID == CheapestPID)
1095 SinglePIDToTry = CheapestPID;
1096 else if (Depth >= MaxDepth) {
1097
1098
1099 if (Entry.CostExcludingGraphEntryPoints > LargeClusterThreshold) {
1100
1101 assert(SimilarDepsCost && Entry.CostExcludingGraphEntryPoints);
1102 const double Ratio = static_cast<double>(SimilarDepsCost) /
1103 Entry.CostExcludingGraphEntryPoints;
1104 assert(Ratio >= 0.0 && Ratio <= 1.0);
1105 if (Ratio > LargeFnOverlapForMerge) {
1106
1107
1108
1110 SinglePIDToTry = MostSimilarPID;
1111 }
1112 } else
1113 SinglePIDToTry = CheapestPID;
1114 }
1115
1116
1117
1118
1119
1120 if (SinglePIDToTry != InvalidPID) {
1121 LLVM_DEBUG(dbgs() << Idx << "=P" << SinglePIDToTry << ' ');
1122
1123 SP.add(SinglePIDToTry, Cluster);
1124 ++Idx;
1125 continue;
1126 }
1127
1128 assert(MostSimilarPID != InvalidPID);
1129
1130
1131
1132
1134
1135
1136 {
1137 SplitProposal BranchSP = SP;
1139 << " [lb] " << Idx << "=P" << CheapestPID << "? ");
1140 BranchSP.add(CheapestPID, Cluster);
1141 pickPartition(Depth + 1, Idx + 1, BranchSP);
1142 }
1143
1144
1145 {
1146 SplitProposal BranchSP = SP;
1148 << " [ms] " << Idx << "=P" << MostSimilarPID << "? ");
1149 BranchSP.add(MostSimilarPID, Cluster);
1150 pickPartition(Depth + 1, Idx + 1, BranchSP);
1151 }
1152
1153 return;
1154 }
1155
1156
1157
1158 assert(Idx == WorkList.size());
1159 assert(NumProposalsSubmitted <= (2u << MaxDepth) &&
1160 "Search got out of bounds?");
1161 SP.setName("recursive_search (depth=" + std::to_string(Depth) + ") #" +
1162 std::to_string(NumProposalsSubmitted++));
1164 SubmitProposal(SP);
1165}
1166
1167std::pair<unsigned, CostType>
1168RecursiveSearchSplitting::findMostSimilarPartition(const WorkListEntry &Entry,
1169 const SplitProposal &SP) {
1170 if (!Entry.NumNonEntryNodes)
1171 return {InvalidPID, 0};
1172
1173
1174
1175
1176 unsigned ChosenPID = InvalidPID;
1177 CostType ChosenCost = 0;
1178 for (unsigned PID = 0; PID < NumParts; ++PID) {
1179 BitVector BV = SP[PID];
1180 BV &= Entry.Cluster;
1181
1182 if (BV.none())
1183 continue;
1184
1185 const CostType Cost = SG.calculateCost(BV);
1186
1187 if (ChosenPID == InvalidPID || ChosenCost < Cost ||
1188 (ChosenCost == Cost && PID > ChosenPID)) {
1189 ChosenPID = PID;
1190 ChosenCost = Cost;
1191 }
1192 }
1193
1194 return {ChosenPID, ChosenCost};
1195}
1196
1197
1198
1199
1200
1201const SplitGraph::Node *mapEdgeToDst(const SplitGraph::Edge *E) {
1202 return E->Dst;
1203}
1204
1205using SplitGraphEdgeDstIterator =
1206 mapped_iterator<SplitGraph::edges_iterator, decltype(&mapEdgeToDst)>;
1207
1208}
1209
1211 using NodeRef = const SplitGraph::Node *;
1214
1215 using EdgeRef = const SplitGraph::Edge *;
1217
1219
1221 return {Ref->outgoing_edges().begin(), mapEdgeToDst};
1222 }
1224 return {Ref->outgoing_edges().end(), mapEdgeToDst};
1225 }
1226
1228 return G.nodes().begin();
1229 }
1231 return G.nodes().end();
1232 }
1233};
1234
1237
1239 return SG.getModule().getName().str();
1240 }
1241
1242 std::string getNodeLabel(const SplitGraph::Node *N, const SplitGraph &SG) {
1243 return N->getName().str();
1244 }
1245
1247 const SplitGraph &SG) {
1248 std::string Result;
1249 if (N->isEntryFunctionCC())
1250 Result += "entry-fn-cc ";
1251 if (N->isNonCopyable())
1252 Result += "non-copyable ";
1253 Result += "cost:" + std::to_string(N->getIndividualCost());
1254 return Result;
1255 }
1256
1258 const SplitGraph &SG) {
1259 return N->hasAnyIncomingEdges() ? "" : "color=\"red\"";
1260 }
1261
1263 SplitGraphEdgeDstIterator EI,
1264 const SplitGraph &SG) {
1265
1266 switch ((*EI.getCurrent())->Kind) {
1267 case SplitGraph::EdgeKind::DirectCall:
1268 return "";
1269 case SplitGraph::EdgeKind::IndirectCall:
1270 return "style=\"dashed\"";
1271 }
1273 }
1274};
1275
1276
1277
1278
1279
1280namespace {
1281
1282
1283
1284
1285static bool needsConservativeImport(const GlobalValue *GV) {
1286 if (const auto *Var = dyn_cast(GV))
1287 return Var->hasLocalLinkage();
1288 return isa(GV);
1289}
1290
1291
1292
1293static void printPartitionSummary(raw_ostream &OS, unsigned N, const Module &M,
1294 unsigned PartCost, unsigned ModuleCost) {
1295 OS << "*** Partition P" << N << " ***\n";
1296
1297 for (const auto &Fn : M) {
1298 if (!Fn.isDeclaration())
1299 OS << " - [function] " << Fn.getName() << "\n";
1300 }
1301
1302 for (const auto &GV : M.globals()) {
1303 if (GV.hasInitializer())
1304 OS << " - [global] " << GV.getName() << "\n";
1305 }
1306
1307 OS << "Partition contains " << formatRatioOf(PartCost, ModuleCost)
1308 << "% of the source\n";
1309}
1310
1311static void evaluateProposal(SplitProposal &Best, SplitProposal New) {
1312 SplitModuleTimer SMT("proposal_evaluation", "proposal ranking algorithm");
1313
1315 New.verifyCompleteness();
1316 if (DebugProposalSearch)
1318 });
1319
1320 const double CurBScore = Best.getBottleneckScore();
1321 const double CurCSScore = Best.getCodeSizeScore();
1322 const double NewBScore = New.getBottleneckScore();
1323 const double NewCSScore = New.getCodeSizeScore();
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335 bool IsBest = false;
1336 if (NewBScore < CurBScore)
1337 IsBest = true;
1338 else if (NewBScore == CurBScore)
1339 IsBest = (NewCSScore < CurCSScore);
1340
1341 if (IsBest)
1342 Best = std::move(New);
1343
1344 LLVM_DEBUG(if (DebugProposalSearch) {
1345 if (IsBest)
1346 dbgs() << "[search] new best proposal!\n";
1347 else
1348 dbgs() << "[search] discarding - not profitable\n";
1349 });
1350}
1351
1352
1353static std::unique_ptr cloneAll(const Module &M) {
1355 return CloneModule(M, VMap, [&](const GlobalValue *GV) { return true; });
1356}
1357
1358
1359static void writeDOTGraph(const SplitGraph &SG) {
1360 if (ModuleDotCfgOutput.empty())
1361 return;
1362
1363 std::error_code EC;
1364 raw_fd_ostream OS(ModuleDotCfgOutput, EC);
1365 if (EC) {
1366 errs() << "[" DEBUG_TYPE "]: cannot open '" << ModuleDotCfgOutput
1367 << "' - DOTGraph will not be printed\n";
1368 }
1369 WriteGraph(OS, SG, false,
1370 SG.getModule().getName());
1371}
1372
1373static void splitAMDGPUModule(
1374 GetTTIFn GetTTI, Module &M, unsigned NumParts,
1375 function_ref<void(std::unique_ptr MPart)> ModuleCallback) {
1376 CallGraph CG(M);
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395 if (!NoExternalizeOnAddrTaken) {
1396 for (auto &Fn : M) {
1397
1398
1399 if (Fn.hasLocalLinkage() && Fn.hasAddressTaken()) {
1401 dbgs() << " because its address is taken\n");
1403 }
1404 }
1405 }
1406
1407
1408
1409 if (!NoExternalizeGlobals) {
1410 for (auto &GV : M.globals()) {
1411 if (GV.hasLocalLinkage())
1412 LLVM_DEBUG(dbgs() << "[externalize] GV " << GV.getName() << '\n');
1414 }
1415 }
1416
1417
1418
1419 FunctionsCostMap FnCosts;
1420 const CostType ModuleCost = calculateFunctionCosts(GetTTI, M, FnCosts);
1421
1422
1423
1424 SplitGraph SG(M, FnCosts, ModuleCost);
1425 SG.buildGraph(CG);
1426
1427 if (SG.empty()) {
1430 << "[!] no nodes in graph, input is empty - no splitting possible\n");
1431 ModuleCallback(cloneAll(M));
1432 return;
1433 }
1434
1436 dbgs() << "[graph] nodes:\n";
1437 for (const SplitGraph::Node *N : SG.nodes()) {
1438 dbgs() << " - [" << N->getID() << "]: " << N->getName() << " "
1439 << (N->isGraphEntryPoint() ? "(entry)" : "") << " "
1440 << (N->isNonCopyable() ? "(noncopyable)" : "") << "\n";
1441 }
1442 });
1443
1444 writeDOTGraph(SG);
1445
1446 LLVM_DEBUG(dbgs() << "[search] testing splitting strategies\n");
1447
1448 std::optional Proposal;
1449 const auto EvaluateProposal = [&](SplitProposal SP) {
1450 SP.calculateScores();
1451 if (!Proposal)
1452 Proposal = std::move(SP);
1453 else
1454 evaluateProposal(*Proposal, std::move(SP));
1455 };
1456
1457
1458
1459 RecursiveSearchSplitting(SG, NumParts, EvaluateProposal).run();
1460 LLVM_DEBUG(if (Proposal) dbgs() << "[search done] selected proposal: "
1461 << Proposal->getName() << "\n";);
1462
1463 if (!Proposal) {
1464 LLVM_DEBUG(dbgs() << "[!] no proposal made, no splitting possible!\n");
1465 ModuleCallback(cloneAll(M));
1466 return;
1467 }
1468
1470
1471 std::optional<raw_fd_ostream> SummariesOS;
1472 if (!PartitionSummariesOutput.empty()) {
1473 std::error_code EC;
1474 SummariesOS.emplace(PartitionSummariesOutput, EC);
1475 if (EC)
1476 errs() << "[" DEBUG_TYPE "]: cannot open '" << PartitionSummariesOutput
1477 << "' - Partition summaries will not be printed\n";
1478 }
1479
1480
1481
1482 bool ImportAllGVs = true;
1483
1484 for (unsigned PID = 0; PID < NumParts; ++PID) {
1485 SplitModuleTimer SMT2("modules_creation",
1486 "creating modules for each partition");
1487 LLVM_DEBUG(dbgs() << "[split] creating new modules\n");
1488
1489 DenseSet<const Function *> FnsInPart;
1490 for (unsigned NodeID : (*Proposal)[PID].set_bits())
1491 FnsInPart.insert(&SG.getNode(NodeID).getFunction());
1492
1493
1494 if (FnsInPart.empty()) {
1496 << " is empty, not creating module\n");
1497 continue;
1498 }
1499
1501 CostType PartCost = 0;
1502 std::unique_ptr MPart(
1503 CloneModule(M, VMap, [&](const GlobalValue *GV) {
1504
1505 if (const auto *Fn = dyn_cast(GV)) {
1506 if (FnsInPart.contains(Fn)) {
1507 PartCost += SG.getCost(*Fn);
1508 return true;
1509 }
1510 return false;
1511 }
1512
1513
1514 return ImportAllGVs || needsConservativeImport(GV);
1515 }));
1516
1517 ImportAllGVs = false;
1518
1519
1520
1521
1522
1524 if (needsConservativeImport(&GV) && GV.use_empty())
1525 GV.eraseFromParent();
1526 }
1527
1528 if (SummariesOS)
1529 printPartitionSummary(*SummariesOS, PID, *MPart, PartCost, ModuleCost);
1530
1532 printPartitionSummary(dbgs(), PID, *MPart, PartCost, ModuleCost));
1533
1534 ModuleCallback(std::move(MPart));
1535 }
1536}
1537}
1538
1541 SplitModuleTimer SMT(
1542 "total", "total pass runtime (incl. potentially waiting for lockfile)");
1543
1548 };
1549
1550 bool Done = false;
1551#ifndef NDEBUG
1552 if (UseLockFile) {
1557 << "'\n");
1558
1559 while (true) {
1561 bool Owned;
1562 if (Error Err = Lock.tryLock().moveInto(Owned)) {
1565 dbgs() << "[amdgpu-split-module] unable to acquire lockfile, debug "
1566 "output may be mangled by other processes\n");
1567 } else if (!Owned) {
1570 break;
1572 continue;
1576 << "[amdgpu-split-module] unable to acquire lockfile, debug "
1577 "output may be mangled by other processes\n");
1579 break;
1580 }
1581 }
1582
1583 splitAMDGPUModule(TTIGetter, M, N, ModuleCallback);
1584 Done = true;
1585 break;
1586 }
1587 }
1588#endif
1589
1591 splitAMDGPUModule(TTIGetter, M, N, ModuleCallback);
1592
1593
1594
1596}
1597}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
The AMDGPU TargetMachine interface definition for hw codegen targets.
Unify divergent function exit nodes
This file defines the BumpPtrAllocator interface.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
This file defines the little GraphTraits template class that should be specialized by classes that...
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
static StringRef getName(Value *V)
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines the SmallVector class.
static void externalize(GlobalValue *GV)
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This pass exposes codegen information to IR-level passes.
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition AMDGPUSplitModule.cpp:1539
Lightweight error class with error context and mandatory checking.
@ HiddenVisibility
The GV is hidden.
@ ExternalLinkage
Externally visible function.
static InstructionCost getMax()
Class that manages the creation of a lock file to aid implicit coordination between different process...
std::error_code unsafeMaybeUnlock() override
Remove the lock file.
WaitForUnlockResult waitForUnlockFor(std::chrono::seconds MaxSeconds) override
For a shared lock, wait until the owner releases the lock.
Expected< bool > tryLock() override
Tries to acquire the lock without blocking.
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef str() const
Explicit conversion to StringRef.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_CodeSize
Instruction code size.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
template class LLVM_TEMPLATE_ABI opt< bool >
template class LLVM_TEMPLATE_ABI opt< unsigned >
initializer< Ty > init(const Ty &Val)
template class LLVM_TEMPLATE_ABI opt< std::string >
LLVM_ABI void system_temp_directory(bool erasedOnReboot, SmallVectorImpl< char > &result)
Get the typical temporary directory for the system, e.g., "/var/tmp" or "C:/TEMP".
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
@ Success
The lock was released successfully.
@ OwnerDied
Owner died while holding the lock.
@ Timeout
Reached timeout while waiting for the owner to release the lock.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ Ref
The access may reference the value stored in memory.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
LLVM_ABI std::unique_ptr< Module > CloneModule(const Module &M)
Return an exact copy of the specified module.
void consumeError(Error Err)
Consume a Error without doing anything.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
static std::string getEdgeAttributes(const SplitGraph::Node *N, SplitGraphEdgeDstIterator EI, const SplitGraph &SG)
Definition AMDGPUSplitModule.cpp:1262
static std::string getGraphName(const SplitGraph &SG)
Definition AMDGPUSplitModule.cpp:1238
DOTGraphTraits(bool IsSimple=false)
Definition AMDGPUSplitModule.cpp:1236
static std::string getNodeAttributes(const SplitGraph::Node *N, const SplitGraph &SG)
Definition AMDGPUSplitModule.cpp:1257
static std::string getNodeDescription(const SplitGraph::Node *N, const SplitGraph &SG)
Definition AMDGPUSplitModule.cpp:1246
std::string getNodeLabel(const SplitGraph::Node *N, const SplitGraph &SG)
Definition AMDGPUSplitModule.cpp:1242
DefaultDOTGraphTraits(bool simple=false)
const SplitGraph::Edge * EdgeRef
Definition AMDGPUSplitModule.cpp:1215
static NodeRef getEntryNode(NodeRef N)
Definition AMDGPUSplitModule.cpp:1218
SplitGraph::nodes_iterator nodes_iterator
Definition AMDGPUSplitModule.cpp:1212
SplitGraph::edges_iterator ChildEdgeIteratorType
Definition AMDGPUSplitModule.cpp:1216
SplitGraphEdgeDstIterator ChildIteratorType
Definition AMDGPUSplitModule.cpp:1213
static nodes_iterator nodes_end(const SplitGraph &G)
Definition AMDGPUSplitModule.cpp:1230
static ChildIteratorType child_begin(NodeRef Ref)
Definition AMDGPUSplitModule.cpp:1220
static nodes_iterator nodes_begin(const SplitGraph &G)
Definition AMDGPUSplitModule.cpp:1227
const SplitGraph::Node * NodeRef
Definition AMDGPUSplitModule.cpp:1211
static ChildIteratorType child_end(NodeRef Ref)
Definition AMDGPUSplitModule.cpp:1223