LLVM: lib/Transforms/IPO/MemProfContextDisambiguation.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
50#include
51#include
52#include <unordered_map>
53#include
54using namespace llvm;
56
57#define DEBUG_TYPE "memprof-context-disambiguation"
58
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
74 "ThinLTO backend");
75STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
88STATISTIC(RemovedEdgesWithMismatchedCallees,
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
96STATISTIC(FoundProfiledCalleeNonUniquelyCount,
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges, "Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes, "Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes, "Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes, "Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters, "Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters, "Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds, "Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted, "Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded, "Number of fixup edges added");
113STATISTIC(NumFixedContexts, "Number of contexts with fixed edges");
114
118 cl::desc("Specify the path prefix of the MemProf dot files."));
119
122 cl::desc("Export graph to dot files."));
123
124
127 cl::desc("Iteratively apply merging on a node to catch new callers"));
128
129
135
137 "memprof-dot-scope", cl::desc("Scope of graph to export to dot"),
142 "Export only nodes with contexts feeding given "
143 "-memprof-dot-alloc-id"),
145 "Export only nodes with given -memprof-dot-context-id")));
146
149 cl::desc("Id of alloc to export if -memprof-dot-scope=alloc "
150 "or to highlight if -memprof-dot-scope=all"));
151
154 cl::desc("Id of context to export if -memprof-dot-scope=context or to "
155 "highlight otherwise"));
156
159 cl::desc("Dump CallingContextGraph to stdout after each stage."));
160
163 cl::desc("Perform verification checks on CallingContextGraph."));
164
167 cl::desc("Perform frequent verification checks on nodes."));
168
170 "memprof-import-summary",
171 cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
173
177 cl::desc("Max depth to recursively search for missing "
178 "frames through tail calls."));
179
180
183 cl::desc("Allow cloning of callsites involved in recursive cycles"));
184
187 cl::desc("Allow cloning of contexts through recursive cycles"));
188
189
190
191
194 cl::desc("Merge clones before assigning functions"));
195
196
197
198
199
200
203 cl::desc("Allow cloning of contexts having recursive cycles"));
204
205
206
209 cl::desc("Minimum absolute count for promoted target to be inlinable"));
210
211namespace llvm {
213 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
215
216
217
220 cl::desc("Linking with hot/cold operator new interfaces"));
221
223 "memprof-require-definition-for-promotion", cl::init(false), cl::Hidden,
225 "Require target function definition when promoting indirect calls"));
226
229
232 cl::desc("Number of largest cold contexts to consider important"));
233
236 cl::desc("Enables edge fixup for important contexts"));
237
238}
239
240namespace {
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256template <typename DerivedCCG, typename FuncTy, typename CallTy>
257class CallsiteContextGraph {
258public:
259 CallsiteContextGraph() = default;
260 CallsiteContextGraph(const CallsiteContextGraph &) = default;
261 CallsiteContextGraph(CallsiteContextGraph &&) = default;
262
263
264 bool process();
265
266
267
268 void identifyClones();
269
270
271
272
273
274
275 bool assignFunctions();
276
277 void dump() const;
279 void printTotalSizes(raw_ostream &OS) const;
280
282 const CallsiteContextGraph &CCG) {
283 CCG.print(OS);
284 return OS;
285 }
286
287 friend struct GraphTraits<
288 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
289 friend struct DOTGraphTraits<
290 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
291
292 void exportToDot(std::string Label) const;
293
294
295 struct FuncInfo final
296 : public std::pair<FuncTy *, unsigned > {
297 using Base = std::pair<FuncTy *, unsigned>;
298 FuncInfo(const Base &B) : Base(B) {}
299 FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}
300 explicit operator bool() const { return this->first != nullptr; }
301 FuncTy *func() const { return this->first; }
302 unsigned cloneNo() const { return this->second; }
303 };
304
305
306 struct CallInfo final : public std::pair<CallTy, unsigned > {
307 using Base = std::pair<CallTy, unsigned>;
308 CallInfo(const Base &B) : Base(B) {}
309 CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)
310 : Base(Call, CloneNo) {}
311 explicit operator bool() const { return (bool)this->first; }
312 CallTy call() const { return this->first; }
313 unsigned cloneNo() const { return this->second; }
314 void setCloneNo(unsigned N) { this->second = N; }
315 void print(raw_ostream &OS) const {
316 if (!operator bool()) {
318 OS << "null Call";
319 return;
320 }
321 call()->print(OS);
322 OS << "\t(clone " << cloneNo() << ")";
323 }
324 void dump() const {
326 dbgs() << "\n";
327 }
328 friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {
330 return OS;
331 }
332 };
333
334 struct ContextEdge;
335
336
337 struct ContextNode {
338
339 unsigned NodeId = 0;
340
341
342
343
344 bool IsAllocation;
345
346
347
348 bool Recursive = false;
349
350
351
352 uint8_t AllocTypes = 0;
353
354
355
356 CallInfo Call;
357
358
359
360
361
363
364
365
366
367
368
369
370
371 uint64_t OrigStackOrAllocId = 0;
372
373
374
375 std::vector<std::shared_ptr> CalleeEdges;
376
377
378
379 std::vector<std::shared_ptr> CallerEdges;
380
381
382
383 bool useCallerEdgesForContextInfo() const {
384
385
386
387
388 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
390
391
392
393
394
396 }
397
398
399
400 DenseSet<uint32_t> getContextIds() const {
401 unsigned Count = 0;
402
403
404
405
406 for (auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
407 Count += Edge->getContextIds().size();
408 DenseSet<uint32_t> ContextIds;
411 CalleeEdges, useCallerEdgesForContextInfo()
412 ? CallerEdges
413 : std::vector<std::shared_ptr>());
414 for (const auto &Edge : Edges)
416 return ContextIds;
417 }
418
419
420
421 uint8_t computeAllocType() const {
422 uint8_t BothTypes =
423 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
424 uint8_t AllocType = (uint8_t)AllocationType::None;
426 CalleeEdges, useCallerEdgesForContextInfo()
427 ? CallerEdges
428 : std::vector<std::shared_ptr>());
429 for (const auto &Edge : Edges) {
431
434 }
436 }
437
438
439
440 bool emptyContextIds() const {
442 CalleeEdges, useCallerEdgesForContextInfo()
443 ? CallerEdges
444 : std::vector<std::shared_ptr>());
445 for (const auto &Edge : Edges) {
446 if (->getContextIds().empty())
447 return false;
448 }
449 return true;
450 }
451
452
453 std::vector<ContextNode *> Clones;
454
455
456 ContextNode *CloneOf = nullptr;
457
458 ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
459
460 ContextNode(bool IsAllocation, CallInfo C)
461 : IsAllocation(IsAllocation), Call(C) {}
462
463 void addClone(ContextNode *Clone) {
464 if (CloneOf) {
465 CloneOf->Clones.push_back(Clone);
466 Clone->CloneOf = CloneOf;
467 } else {
468 Clones.push_back(Clone);
469 assert(!Clone->CloneOf);
470 Clone->CloneOf = this;
471 }
472 }
473
474 ContextNode *getOrigNode() {
475 if (!CloneOf)
476 return this;
477 return CloneOf;
478 }
479
481 unsigned int ContextId);
482
483 ContextEdge *findEdgeFromCallee(const ContextNode *Callee);
484 ContextEdge *findEdgeFromCaller(const ContextNode *Caller);
485 void eraseCalleeEdge(const ContextEdge *Edge);
486 void eraseCallerEdge(const ContextEdge *Edge);
487
488 void setCall(CallInfo C) { Call = C; }
489
490 bool hasCall() const { return (bool)Call.call(); }
491
492 void printCall(raw_ostream &OS) const { Call.print(OS); }
493
494
495
496 bool isRemoved() const {
497
498
499
500
502 (AllocTypes == (uint8_t)AllocationType::None) ==
503 emptyContextIds());
504 return AllocTypes == (uint8_t)AllocationType::None;
505 }
506
507 void dump() const;
508 void print(raw_ostream &OS) const;
509
510 friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
511 Node.print(OS);
512 return OS;
513 }
514 };
515
516
517
518 struct ContextEdge {
519 ContextNode *Callee;
520 ContextNode *Caller;
521
522
523
524 uint8_t AllocTypes = 0;
525
526
527
528
529
530
531
532 bool IsBackedge = false;
533
534
535 DenseSet<uint32_t> ContextIds;
536
537 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,
538 DenseSet<uint32_t> ContextIds)
539 : Callee(Callee), Caller(Caller), AllocTypes(AllocType),
540 ContextIds(std::move(ContextIds)) {}
541
542 DenseSet<uint32_t> &getContextIds() { return ContextIds; }
543
544
545
546 inline void clear() {
547 ContextIds.clear();
548 AllocTypes = (uint8_t)AllocationType::None;
549 Caller = nullptr;
550 Callee = nullptr;
551 }
552
553
554
555
556 inline bool isRemoved() const {
557 if (Callee || Caller)
558 return false;
559
560
561
562 assert(AllocTypes == (uint8_t)AllocationType::None);
563 assert(ContextIds.empty());
564 return true;
565 }
566
567 void dump() const;
568 void print(raw_ostream &OS) const;
569
570 friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {
571 Edge.print(OS);
572 return OS;
573 }
574 };
575
576
577
578 void removeNoneTypeCalleeEdges(ContextNode *Node);
579 void removeNoneTypeCallerEdges(ContextNode *Node);
580 void
581 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,
582 DenseSet<const ContextNode *> &Visited);
583
584protected:
585
586
587 template <class NodeT, class IteratorT>
588 std::vector<uint64_t>
589 getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
590
591
592
593 ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);
594
595
596 template <class NodeT, class IteratorT>
597 void addStackNodesForMIB(
598 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
601 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
602
603
604
605
606 void updateStackNodes();
607
608
609
610
611
612
613
614
615 void fixupImportantContexts();
616
617
618
619 void handleCallsitesWithMultipleTargets();
620
621
622 void markBackedges();
623
624
625
626 void mergeClones();
627
628
629
630
631
632 bool partitionCallsByCallee(
634 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
635
636
637
638 MapVector<FuncTy *, std::vector> FuncToCallsWithMetadata;
639
640
641 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
642
643
644
645 DenseSet<uint32_t> DotAllocContextIds;
646
647private:
648 using EdgeIter = typename std::vector<std::shared_ptr>::iterator;
649
650
651
652
653 struct CallContextInfo {
654
655 CallTy Call;
656
657 std::vector<uint64_t> StackIds;
658
659 const FuncTy *Func;
660
661
662 DenseSet<uint32_t> ContextIds;
663 };
664
665
666
667
668
669
670
671 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI = nullptr,
672 bool CalleeIter = true);
673
674
675
676
677
678
679
680 void assignStackNodesPostOrder(
681 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
682 DenseMap<uint64_t, std::vector> &StackIdToMatchingCalls,
683 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
684 const DenseSet<uint32_t> &ImportantContextIds);
685
686
687
688
689 DenseSet<uint32_t> duplicateContextIds(
690 const DenseSet<uint32_t> &StackSequenceContextIds,
691 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
692
693
694 void propagateDuplicateContextIds(
695 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
696
697
698
699
700 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
701 bool TowardsCallee,
702 DenseSet<uint32_t> RemainingContextIds);
703
704
705
706
707 uint64_t getStackId(uint64_t IdOrIndex) const {
708 return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
709 }
710
711
712
713
714
715
716
717 bool
718 calleesMatch(CallTy Call, EdgeIter &EI,
719 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);
720
721
722
723 const FuncTy *getCalleeFunc(CallTy Call) {
724 return static_cast<DerivedCCG *>(this)->getCalleeFunc(Call);
725 }
726
727
728
729
730 bool calleeMatchesFunc(
731 CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc,
732 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
733 return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(
734 Call, Func, CallerFunc, FoundCalleeChain);
735 }
736
737
738 bool sameCallee(CallTy Call1, CallTy Call2) {
739 return static_cast<DerivedCCG *>(this)->sameCallee(Call1, Call2);
740 }
741
742
743
744 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
745 return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
747 }
748
749
750 uint64_t getLastStackId(CallTy Call) {
751 return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
752 }
753
754
756 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
757 static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
758 }
759
760
762 return static_cast<const DerivedCCG *>(this)->getAllocationCallType(Call);
763 }
764
765
766
767 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
768 static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
769 }
770
771
772
773
774 FuncInfo cloneFunctionForCallsite(
775 FuncInfo &Func, CallInfo &Call, DenseMap<CallInfo, CallInfo> &CallMap,
776 std::vector &CallsWithMetadataInFunc, unsigned CloneNo) {
777 return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
778 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
779 }
780
781
782
783 std::string getLabel(const FuncTy *Func, const CallTy Call,
784 unsigned CloneNo) const {
785 return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
786 }
787
788
789 ContextNode *createNewNode(bool IsAllocation, const FuncTy *F = nullptr,
790 CallInfo C = CallInfo()) {
791 NodeOwner.push_back(std::make_unique(IsAllocation, C));
792 auto *NewNode = NodeOwner.back().get();
793 if (F)
794 NodeToCallingFunc[NewNode] = F;
795 NewNode->NodeId = NodeOwner.size();
796 return NewNode;
797 }
798
799
800 ContextNode *getNodeForInst(const CallInfo &C);
801 ContextNode *getNodeForAlloc(const CallInfo &C);
802 ContextNode *getNodeForStackId(uint64_t StackId);
803
804
805
806 uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds) const;
807
808
809
810
811 uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
812 const DenseSet<uint32_t> &Node2Ids) const;
813
814
815
816 uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
817 const DenseSet<uint32_t> &Node2Ids) const;
818
819
820
821
822
823 ContextNode *
824 moveEdgeToNewCalleeClone(const std::shared_ptr &Edge,
825 DenseSet<uint32_t> ContextIdsToMove = {});
826
827
828
829
830
831 void moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge,
832 ContextNode *NewCallee,
833 bool NewClone = false,
834 DenseSet<uint32_t> ContextIdsToMove = {});
835
836
837
838
839
840
841 void moveCalleeEdgeToNewCaller(const std::shared_ptr &Edge,
842 ContextNode *NewCaller);
843
844
845 void markBackedges(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
846 DenseSet<const ContextNode *> &CurrentStack);
847
848
849 void
850 mergeClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
851 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
852
853 void mergeNodeCalleeClones(
854 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
855 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
856
857
858 void findOtherCallersToShareMerge(
859 ContextNode *Node, std::vector<std::shared_ptr> &CalleeEdges,
860 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
861 DenseSet<ContextNode *> &OtherCallersToShareMerge);
862
863
864
865
866
867 void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
868 const DenseSet<uint32_t> &AllocContextIds);
869
870
871 DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
872
873
874
875
876
877 DenseMap<uint32_t, std::vector> ContextIdToContextSizeInfos;
878
879
880
881
882
883 DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
884
885
886
887 struct ImportantContextInfo {
888
889 std::vector<uint64_t> StackIds;
890
891
892 unsigned MaxLength = 0;
893
894
895
896 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
897 };
898
899
900 DenseMap<uint32_t, ImportantContextInfo> ImportantContextIdInfo;
901
902
903
904
905 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *Node,
906
907
908
909 const DenseSet<uint32_t> &NodeContextIds,
910 const DenseSet<uint32_t> &ImportantContextIds) {
913 return;
914 }
915 DenseSet<uint32_t> Ids =
918 return;
919 auto Size = StackIds.size();
920 for (auto Id : Ids) {
921 auto &Entry = ImportantContextIdInfo[Id];
922 Entry.StackIdsToNode[StackIds] = Node;
923
926 }
927 }
928
929
930 MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
931 MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
932
933
934 std::vector<std::unique_ptr> NodeOwner;
935
936
937 void check() const;
938
939
940 unsigned int LastContextId = 0;
941};
942
943template <typename DerivedCCG, typename FuncTy, typename CallTy>
944using ContextNode =
945 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
946template <typename DerivedCCG, typename FuncTy, typename CallTy>
947using ContextEdge =
948 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
949template <typename DerivedCCG, typename FuncTy, typename CallTy>
950using FuncInfo =
951 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
952template <typename DerivedCCG, typename FuncTy, typename CallTy>
953using CallInfo =
954 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
955
956
957class ModuleCallsiteContextGraph
958 : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
959 Instruction *> {
960public:
961 ModuleCallsiteContextGraph(
963 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
964
965private:
966 friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
968
969 uint64_t getStackId(uint64_t IdOrIndex) const;
970 const Function *getCalleeFunc(Instruction *Call);
971 bool calleeMatchesFunc(
972 Instruction *Call, const Function *Func, const Function *CallerFunc,
973 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
974 bool sameCallee(Instruction *Call1, Instruction *Call2);
975 bool findProfiledCalleeThroughTailCalls(
976 const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
977 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
978 bool &FoundMultipleCalleeChains);
979 uint64_t getLastStackId(Instruction *Call);
980 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
983 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
984 CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
986 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
987 DenseMap<CallInfo, CallInfo> &CallMap,
988 std::vector &CallsWithMetadataInFunc,
989 unsigned CloneNo);
990 std::string getLabel(const Function *Func, const Instruction *Call,
991 unsigned CloneNo) const;
992
994 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
995};
996
997
998
999
1000struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
1001 IndexCall() : PointerUnion() {}
1002 IndexCall(std::nullptr_t) : IndexCall() {}
1003 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1004 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1005 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1006
1007 IndexCall *operator->() { return this; }
1008
1009 void print(raw_ostream &OS) const {
1010 PointerUnion<CallsiteInfo *, AllocInfo *> Base = *this;
1012 OS << *AI;
1013 } else {
1016 OS << *CI;
1017 }
1018 }
1019};
1020}
1021
1022namespace llvm {
1031}
1032
1033namespace {
1034
1035class IndexCallsiteContextGraph
1036 : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1037 IndexCall> {
1038public:
1039 IndexCallsiteContextGraph(
1040 ModuleSummaryIndex &Index,
1041 llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1042 isPrevailing);
1043
1044 ~IndexCallsiteContextGraph() {
1045
1046
1047
1048
1049 for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) {
1051 for (auto &Callsite : I.second)
1052 FS->addCallsite(*Callsite.second);
1053 }
1054 }
1055
1056private:
1057 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1058 IndexCall>;
1059
1060 uint64_t getStackId(uint64_t IdOrIndex) const;
1061 const FunctionSummary *getCalleeFunc(IndexCall &Call);
1062 bool calleeMatchesFunc(
1063 IndexCall &Call, const FunctionSummary *Func,
1064 const FunctionSummary *CallerFunc,
1065 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1066 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1067 bool findProfiledCalleeThroughTailCalls(
1068 ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
1069 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1070 bool &FoundMultipleCalleeChains);
1071 uint64_t getLastStackId(IndexCall &Call);
1072 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
1075 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1076 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1077 IndexCall>::FuncInfo
1078 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
1079 DenseMap<CallInfo, CallInfo> &CallMap,
1080 std::vector &CallsWithMetadataInFunc,
1081 unsigned CloneNo);
1082 std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
1083 unsigned CloneNo) const;
1084
1085
1086
1087 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1088
1089 const ModuleSummaryIndex &Index;
1090 llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1091 isPrevailing;
1092
1093
1094
1095
1096
1097 std::unordered_map<FunctionSummary *,
1098 std::map<ValueInfo, std::unique_ptr>>
1099 FunctionCalleesToSynthesizedCallsiteInfos;
1100};
1101}
1102
1103template <>
1107template <>
1110 : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1111template <>
1113 : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1114
1115namespace {
1116
1117
1118
1119
1120
1121
1124 if (AllocTypes ==
1127 else
1129}
1130
1131
1132
1133
1134template <typename DerivedCCG, typename FuncTy, typename CallTy>
1135bool allocTypesMatch(
1136 const std::vector<uint8_t> &InAllocTypes,
1137 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1138 &Edges) {
1139
1140
1141 assert(InAllocTypes.size() == Edges.size());
1142 return std::equal(
1143 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1145 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1146
1147
1148
1149 if (l == (uint8_t)AllocationType::None ||
1150 r->AllocTypes == (uint8_t)AllocationType::None)
1151 return true;
1152 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1153 });
1154}
1155
1156
1157
1158
1159
1160
1161template <typename DerivedCCG, typename FuncTy, typename CallTy>
1162bool allocTypesMatchClone(
1163 const std::vector<uint8_t> &InAllocTypes,
1164 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1165 const ContextNode<DerivedCCG, FuncTy, CallTy> *Node = Clone->CloneOf;
1167
1168
1169 assert(InAllocTypes.size() == Node->CalleeEdges.size());
1170
1172 EdgeCalleeMap;
1173 for (const auto &E : Clone->CalleeEdges) {
1175 EdgeCalleeMap[E->Callee] = E->AllocTypes;
1176 }
1177
1178
1179 for (unsigned I = 0; I < Node->CalleeEdges.size(); I++) {
1180 auto Iter = EdgeCalleeMap.find(Node->CalleeEdges[I]->Callee);
1181
1182 if (Iter == EdgeCalleeMap.end())
1183 continue;
1184
1185
1186
1189 continue;
1190 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[I]))
1191 return false;
1192 }
1193 return true;
1194}
1195
1196}
1197
1198template <typename DerivedCCG, typename FuncTy, typename CallTy>
1199typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1200CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1201 const CallInfo &C) {
1202 ContextNode *Node = getNodeForAlloc(C);
1204 return Node;
1205
1206 return NonAllocationCallToContextNodeMap.lookup(C);
1207}
1208
1209template <typename DerivedCCG, typename FuncTy, typename CallTy>
1210typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1211CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1212 const CallInfo &C) {
1213 return AllocationCallToContextNodeMap.lookup(C);
1214}
1215
1216template <typename DerivedCCG, typename FuncTy, typename CallTy>
1217typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1218CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1220 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1221 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1222 return StackEntryNode->second;
1223 return nullptr;
1224}
1225
1226template <typename DerivedCCG, typename FuncTy, typename CallTy>
1227void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1229 unsigned int ContextId) {
1230 for (auto &Edge : CallerEdges) {
1231 if (Edge->Caller == Caller) {
1233 Edge->getContextIds().insert(ContextId);
1234 return;
1235 }
1236 }
1237 std::shared_ptr Edge = std::make_shared(
1238 this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));
1239 CallerEdges.push_back(Edge);
1240 Caller->CalleeEdges.push_back(Edge);
1241}
1242
1243template <typename DerivedCCG, typename FuncTy, typename CallTy>
1244void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1245 ContextEdge *Edge, EdgeIter *EI, bool CalleeIter) {
1246 assert(!EI || (*EI)->get() == Edge);
1248
1249
1250
1251
1254
1255
1256
1257
1258 Edge->clear();
1259
1260#ifndef NDEBUG
1261 auto CalleeCallerCount = Callee->CallerEdges.size();
1262 auto CallerCalleeCount = Caller->CalleeEdges.size();
1263#endif
1264 if (!EI) {
1267 } else if (CalleeIter) {
1269 *EI = Caller->CalleeEdges.erase(*EI);
1270 } else {
1272 *EI = Callee->CallerEdges.erase(*EI);
1273 }
1274 assert(Callee->CallerEdges.size() < CalleeCallerCount);
1275 assert(Caller->CalleeEdges.size() < CallerCalleeCount);
1276}
1277
1278template <typename DerivedCCG, typename FuncTy, typename CallTy>
1279void CallsiteContextGraph<
1280 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1281 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
1282 auto Edge = *EI;
1283 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
1285 removeEdgeFromGraph(Edge.get(), &EI, true);
1286 } else
1287 ++EI;
1288 }
1289}
1290
1291template <typename DerivedCCG, typename FuncTy, typename CallTy>
1292void CallsiteContextGraph<
1293 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1294 for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
1295 auto Edge = *EI;
1296 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
1298 Edge->Caller->eraseCalleeEdge(Edge.get());
1299 EI = Node->CallerEdges.erase(EI);
1300 } else
1301 ++EI;
1302 }
1303}
1304
1305template <typename DerivedCCG, typename FuncTy, typename CallTy>
1306typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1307CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1308 findEdgeFromCallee(const ContextNode *Callee) {
1309 for (const auto &Edge : CalleeEdges)
1310 if (Edge->Callee == Callee)
1311 return Edge.get();
1312 return nullptr;
1313}
1314
1315template <typename DerivedCCG, typename FuncTy, typename CallTy>
1316typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1317CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1318 findEdgeFromCaller(const ContextNode *Caller) {
1319 for (const auto &Edge : CallerEdges)
1320 if (Edge->Caller == Caller)
1321 return Edge.get();
1322 return nullptr;
1323}
1324
1325template <typename DerivedCCG, typename FuncTy, typename CallTy>
1326void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1327 eraseCalleeEdge(const ContextEdge *Edge) {
1329 CalleeEdges, [Edge](const std::shared_ptr &CalleeEdge) {
1330 return CalleeEdge.get() == Edge;
1331 });
1332 assert(EI != CalleeEdges.end());
1333 CalleeEdges.erase(EI);
1334}
1335
1336template <typename DerivedCCG, typename FuncTy, typename CallTy>
1337void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1338 eraseCallerEdge(const ContextEdge *Edge) {
1340 CallerEdges, [Edge](const std::shared_ptr &CallerEdge) {
1341 return CallerEdge.get() == Edge;
1342 });
1343 assert(EI != CallerEdges.end());
1344 CallerEdges.erase(EI);
1345}
1346
1347template <typename DerivedCCG, typename FuncTy, typename CallTy>
1348uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1349 DenseSet<uint32_t> &ContextIds) const {
1350 uint8_t BothTypes =
1351 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1352 uint8_t AllocType = (uint8_t)AllocationType::None;
1353 for (auto Id : ContextIds) {
1354 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1355
1358 }
1360}
1361
1362template <typename DerivedCCG, typename FuncTy, typename CallTy>
1363uint8_t
1364CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1365 const DenseSet<uint32_t> &Node1Ids,
1366 const DenseSet<uint32_t> &Node2Ids) const {
1367 uint8_t BothTypes =
1368 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1369 uint8_t AllocType = (uint8_t)AllocationType::None;
1370 for (auto Id : Node1Ids) {
1371 if (!Node2Ids.count(Id))
1372 continue;
1373 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1374
1377 }
1379}
1380
1381template <typename DerivedCCG, typename FuncTy, typename CallTy>
1382uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1383 const DenseSet<uint32_t> &Node1Ids,
1384 const DenseSet<uint32_t> &Node2Ids) const {
1385 if (Node1Ids.size() < Node2Ids.size())
1386 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1387 else
1388 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1389}
1390
1391template <typename DerivedCCG, typename FuncTy, typename CallTy>
1392typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1393CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1394 CallInfo Call, const FuncTy *F) {
1396 ContextNode *AllocNode = createNewNode(true, F, Call);
1397 AllocationCallToContextNodeMap[Call] = AllocNode;
1398
1399 AllocNode->OrigStackOrAllocId = LastContextId;
1400
1401
1402 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1403
1404 return AllocNode;
1405}
1406
1408 if (!AllocTypes)
1409 return "None";
1410 std::string Str;
1412 Str += "NotCold";
1414 Str += "Cold";
1415 return Str;
1416}
1417
1418template <typename DerivedCCG, typename FuncTy, typename CallTy>
1419template <class NodeT, class IteratorT>
1420void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1421 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1424 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1425
1426
1427 if (AllocType == AllocationType::Hot)
1428 AllocType = AllocationType::NotCold;
1429
1430 ContextIdToAllocationType[++LastContextId] = AllocType;
1431
1432 bool IsImportant = false;
1433 if (!ContextSizeInfo.empty()) {
1434 auto &Entry = ContextIdToContextSizeInfos[LastContextId];
1435
1436
1438 uint64_t TotalCold = 0;
1439 for (auto &CSI : ContextSizeInfo)
1440 TotalCold += CSI.TotalSize;
1441
1442
1444
1445
1446 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1448
1449 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1450 TotalSizeToContextIdTopNCold.erase(
1451 TotalSizeToContextIdTopNCold.begin());
1452 assert(ImportantContextIdInfo.count(IdToRemove));
1453 ImportantContextIdInfo.erase(IdToRemove);
1454 }
1455 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1456 IsImportant = true;
1457 }
1458 }
1459 Entry.insert(Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1460 }
1461
1462
1463 AllocNode->AllocTypes |= (uint8_t)AllocType;
1464
1465
1466
1467
1468 ContextNode *PrevNode = AllocNode;
1469
1470
1471
1472 SmallSet<uint64_t, 8> StackIdSet;
1473
1475 ContextIter != StackContext.end(); ++ContextIter) {
1476 auto StackId = getStackId(*ContextIter);
1477 if (IsImportant)
1478 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1479 ContextNode *StackNode = getNodeForStackId(StackId);
1480 if (!StackNode) {
1481 StackNode = createNewNode(false);
1482 StackEntryIdToContextNodeMap[StackId] = StackNode;
1483 StackNode->OrigStackOrAllocId = StackId;
1484 }
1485
1486
1488 auto Ins = StackIdSet.insert(StackId);
1489 if (!Ins.second)
1490 StackNode->Recursive = true;
1491 }
1492 StackNode->AllocTypes |= (uint8_t)AllocType;
1493 PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
1494 PrevNode = StackNode;
1495 }
1496}
1497
1498template <typename DerivedCCG, typename FuncTy, typename CallTy>
1499DenseSet<uint32_t>
1500CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1501 const DenseSet<uint32_t> &StackSequenceContextIds,
1502 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1503 DenseSet<uint32_t> NewContextIds;
1504 for (auto OldId : StackSequenceContextIds) {
1505 NewContextIds.insert(++LastContextId);
1506 OldToNewContextIds[OldId].insert(LastContextId);
1507 assert(ContextIdToAllocationType.count(OldId));
1508
1509 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1510 if (DotAllocContextIds.contains(OldId))
1511 DotAllocContextIds.insert(LastContextId);
1512 }
1513 return NewContextIds;
1514}
1515
1516template <typename DerivedCCG, typename FuncTy, typename CallTy>
1517void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1518 propagateDuplicateContextIds(
1519 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1520
1521 auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {
1522 DenseSet<uint32_t> NewIds;
1523 for (auto Id : ContextIds)
1524 if (auto NewId = OldToNewContextIds.find(Id);
1525 NewId != OldToNewContextIds.end())
1527 return NewIds;
1528 };
1529
1530
1531 auto UpdateCallers = [&](ContextNode *Node,
1532 DenseSet<const ContextEdge *> &Visited,
1533 auto &&UpdateCallers) -> void {
1534 for (const auto &Edge : Node->CallerEdges) {
1537 continue;
1538 ContextNode *NextNode = Edge->Caller;
1539 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
1540
1541
1542 if (!NewIdsToAdd.empty()) {
1543 Edge->getContextIds().insert_range(NewIdsToAdd);
1544 UpdateCallers(NextNode, Visited, UpdateCallers);
1545 }
1546 }
1547 };
1548
1549 DenseSet<const ContextEdge *> Visited;
1550 for (auto &Entry : AllocationCallToContextNodeMap) {
1552 UpdateCallers(Node, Visited, UpdateCallers);
1553 }
1554}
1555
1556template <typename DerivedCCG, typename FuncTy, typename CallTy>
1557void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1558 ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee,
1559
1560
1561 DenseSet<uint32_t> RemainingContextIds) {
1562 auto &OrigEdges =
1563 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1564 DenseSet<uint32_t> RecursiveContextIds;
1565 DenseSet<uint32_t> AllCallerContextIds;
1567
1568
1569
1570 for (auto &CE : OrigEdges) {
1571 AllCallerContextIds.reserve(CE->getContextIds().size());
1572 for (auto Id : CE->getContextIds())
1573 if (!AllCallerContextIds.insert(Id).second)
1574 RecursiveContextIds.insert(Id);
1575 }
1576 }
1577
1578 for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1579 auto Edge = *EI;
1580 DenseSet<uint32_t> NewEdgeContextIds;
1581 DenseSet<uint32_t> NotFoundContextIds;
1582
1583
1584
1585 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1586 NotFoundContextIds);
1587
1588
1589 if (RecursiveContextIds.empty()) {
1590
1591
1592 RemainingContextIds.swap(NotFoundContextIds);
1593 } else {
1594
1595
1596
1597
1598
1599
1600
1601
1602 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1603 set_difference(NewEdgeContextIds, RecursiveContextIds);
1604 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1605 }
1606
1607 if (NewEdgeContextIds.empty()) {
1608 ++EI;
1609 continue;
1610 }
1611 if (TowardsCallee) {
1612 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1613 auto NewEdge = std::make_shared(
1614 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1615 NewNode->CalleeEdges.push_back(NewEdge);
1616 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1617 } else {
1618 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1619 auto NewEdge = std::make_shared(
1620 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1621 NewNode->CallerEdges.push_back(NewEdge);
1622 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1623 }
1624
1625 if (Edge->getContextIds().empty()) {
1626 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);
1627 continue;
1628 }
1629 ++EI;
1630 }
1631}
1632
1633template <typename DerivedCCG, typename FuncTy, typename CallTy>
1635 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1636
1637
1639 assert(!Edge->ContextIds.empty());
1640}
1641
1642template <typename DerivedCCG, typename FuncTy, typename CallTy>
1643static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1644 bool CheckEdges = true) {
1645 if (Node->isRemoved())
1646 return;
1647#ifndef NDEBUG
1648
1649 auto NodeContextIds = Node->getContextIds();
1650#endif
1651
1652
1653 if (Node->CallerEdges.size()) {
1655 Node->CallerEdges.front()->ContextIds);
1657 if (CheckEdges)
1659 set_union(CallerEdgeContextIds, Edge->ContextIds);
1660 }
1661
1662
1663
1664
1666 NodeContextIds == CallerEdgeContextIds ||
1667 set_is_subset(CallerEdgeContextIds, NodeContextIds));
1668 }
1669 if (Node->CalleeEdges.size()) {
1671 Node->CalleeEdges.front()->ContextIds);
1673 if (CheckEdges)
1675 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1676 }
1677
1678
1679
1681 NodeContextIds == CalleeEdgeContextIds);
1682 }
1683
1684
1685
1686#ifndef NDEBUG
1687
1688
1690 for (const auto &E : Node->CalleeEdges)
1693#endif
1694}
1695
1696template <typename DerivedCCG, typename FuncTy, typename CallTy>
1697void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1698 assignStackNodesPostOrder(ContextNode *Node,
1699 DenseSet<const ContextNode *> &Visited,
1700 DenseMap<uint64_t, std::vector>
1701 &StackIdToMatchingCalls,
1702 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1703 const DenseSet<uint32_t> &ImportantContextIds) {
1706 return;
1707
1708
1709
1710
1711 auto CallerEdges = Node->CallerEdges;
1712 for (auto &Edge : CallerEdges) {
1713
1714 if (Edge->isRemoved()) {
1716 continue;
1717 }
1718 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1719 CallToMatchingCall, ImportantContextIds);
1720 }
1721
1722
1723
1724
1725
1726
1727
1728 if (Node->IsAllocation ||
1729 !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
1730 return;
1731
1732 auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
1733
1734
1735
1736 if (Calls.size() == 1) {
1737 auto &[Call, Ids, Func, SavedContextIds] = Calls[0];
1738 if (Ids.size() == 1) {
1739 assert(SavedContextIds.empty());
1740
1741 assert(Node == getNodeForStackId(Ids[0]));
1742 if (Node->Recursive)
1743 return;
1745 NonAllocationCallToContextNodeMap[Call] = Node;
1746 NodeToCallingFunc[Node] = Func;
1747 recordStackNode(Ids, Node, Node->getContextIds(), ImportantContextIds);
1748 return;
1749 }
1750 }
1751
1752#ifndef NDEBUG
1753
1754
1755 uint64_t LastId = Node->OrigStackOrAllocId;
1756 ContextNode *LastNode = getNodeForStackId(LastId);
1757
1759 assert(LastNode == Node);
1760#else
1761 ContextNode *LastNode = Node;
1762#endif
1763
1764
1765
1766 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1767
1768 [[maybe_unused]] bool PrevIterCreatedNode = false;
1769 bool CreatedNode = false;
1770 for (unsigned I = 0; I < Calls.size();
1771 I++, PrevIterCreatedNode = CreatedNode) {
1772 CreatedNode = false;
1773 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1774
1775
1776 if (SavedContextIds.empty()) {
1777
1778
1779
1780
1782 continue;
1783 auto MatchingCall = CallToMatchingCall[Call];
1784 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1785
1786
1787
1788 assert(I > 0 && !PrevIterCreatedNode);
1789 continue;
1790 }
1791 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1793 continue;
1794 }
1795
1796 assert(LastId == Ids.back());
1797
1798
1799
1800
1801
1802
1803
1804 set_intersect(SavedContextIds, LastNodeContextIds);
1805 ContextNode *PrevNode = LastNode;
1806 bool Skip = false;
1807
1808
1809 for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1810 auto Id = *IdIter;
1811 ContextNode *CurNode = getNodeForStackId(Id);
1812
1813
1815 assert(!CurNode->Recursive);
1816
1817 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1818 if () {
1819 Skip = true;
1820 break;
1821 }
1822 PrevNode = CurNode;
1823
1824
1825
1827
1828
1829 if (SavedContextIds.empty()) {
1830 Skip = true;
1831 break;
1832 }
1833 }
1834 if (Skip)
1835 continue;
1836
1837
1838 ContextNode *NewNode = createNewNode(false, Func, Call);
1839 NonAllocationCallToContextNodeMap[Call] = NewNode;
1840 CreatedNode = true;
1841 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1842
1843 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1845
1846
1847
1848
1849 connectNewNode(NewNode, FirstNode, true, SavedContextIds);
1850
1851
1852
1853
1854 connectNewNode(NewNode, LastNode, false, SavedContextIds);
1855
1856
1857
1858 PrevNode = nullptr;
1859 for (auto Id : Ids) {
1860 ContextNode *CurNode = getNodeForStackId(Id);
1861
1863
1864
1865
1866 if (PrevNode) {
1867 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1868
1869
1870 if (!PrevEdge) {
1871 PrevNode = CurNode;
1872 continue;
1873 }
1874 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1875 if (PrevEdge->getContextIds().empty())
1876 removeEdgeFromGraph(PrevEdge);
1877 }
1878
1879
1880
1881 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1882 ? (uint8_t)AllocationType::None
1883 : CurNode->computeAllocType();
1884 PrevNode = CurNode;
1885 }
1886
1887 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1888
1891 for (auto Id : Ids) {
1892 ContextNode *CurNode = getNodeForStackId(Id);
1893
1896 }
1897 }
1898 }
1899}
1900
1901template <typename DerivedCCG, typename FuncTy, typename CallTy>
1902void CallsiteContextGraph<DerivedCCG, FuncTy,
1903 CallTy>::fixupImportantContexts() {
1904 if (ImportantContextIdInfo.empty())
1905 return;
1906
1907
1908 NumImportantContextIds = ImportantContextIdInfo.size();
1909
1911 return;
1912
1914 exportToDot("beforestackfixup");
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939 for (auto &[CurContextId, Info] : ImportantContextIdInfo) {
1940 if (Info.StackIdsToNode.empty())
1941 continue;
1943 ContextNode *PrevNode = nullptr;
1944 ContextNode *CurNode = nullptr;
1945 DenseSet<const ContextEdge *> VisitedEdges;
1946 ArrayRef<uint64_t> AllStackIds(Info.StackIds);
1947
1948
1949 for (unsigned I = 0; I < AllStackIds.size(); I++, PrevNode = CurNode) {
1950
1951
1952 auto Len = Info.MaxLength;
1953 auto LenToEnd = AllStackIds.size() - I;
1954 if (Len > LenToEnd)
1955 Len = LenToEnd;
1956 CurNode = nullptr;
1957
1958
1960
1961 auto CheckStackIds = AllStackIds.slice(I, Len);
1962 auto EntryIt = Info.StackIdsToNode.find(CheckStackIds);
1963 if (EntryIt == Info.StackIdsToNode.end())
1964 continue;
1965 CurNode = EntryIt->second;
1966
1967
1969 break;
1970 }
1971
1972
1973
1974
1975 if (!CurNode)
1976 break;
1977
1978
1979 if (!PrevNode)
1980 continue;
1981
1982 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1983 if (CurEdge) {
1984
1985 if (CurEdge->getContextIds().insert(CurContextId).second) {
1986 NumFixupEdgeIdsInserted++;
1988 }
1989 } else {
1990
1991 NumFixupEdgesAdded++;
1992 DenseSet<uint32_t> ContextIds({CurContextId});
1993 auto AllocType = computeAllocType(ContextIds);
1994 auto NewEdge = std::make_shared(
1995 PrevNode, CurNode, AllocType, std::move(ContextIds));
1996 PrevNode->CallerEdges.push_back(NewEdge);
1997 CurNode->CalleeEdges.push_back(NewEdge);
1998
1999 CurEdge = NewEdge.get();
2001 }
2002 VisitedEdges.insert(CurEdge);
2003
2004
2005 for (auto &Edge : PrevNode->CallerEdges) {
2006
2007
2008 if (Edge.get() != CurEdge && !VisitedEdges.contains(Edge.get()))
2009 Edge->getContextIds().erase(CurContextId);
2010 }
2011 }
2013 NumFixedContexts++;
2014 }
2015}
2016
2017template <typename DerivedCCG, typename FuncTy, typename CallTy>
2018void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2019
2020
2021
2022
2023
2024
2025
2026 DenseMap<uint64_t, std::vector> StackIdToMatchingCalls;
2027 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2028 for (auto &Call : CallsWithMetadata) {
2029
2030 if (AllocationCallToContextNodeMap.count(Call))
2031 continue;
2032 auto StackIdsWithContextNodes =
2033 getStackIdsWithContextNodesForCall(Call.call());
2034
2035
2036 if (StackIdsWithContextNodes.empty())
2037 continue;
2038
2039
2040 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2041 {Call.call(), StackIdsWithContextNodes, Func, {}});
2042 }
2043 }
2044
2045
2046
2047
2048
2049
2050
2051 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2052
2053
2054
2055 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2056 for (auto &It : StackIdToMatchingCalls) {
2057 auto &Calls = It.getSecond();
2058
2059 if (Calls.size() == 1) {
2060 auto &Ids = Calls[0].StackIds;
2061 if (Ids.size() == 1)
2062 continue;
2063 }
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2075 for (const auto &[Idx, CallCtxInfo] : enumerate(Calls))
2076 FuncToIndex.insert({CallCtxInfo.Func, Idx});
2078 Calls,
2079 [&FuncToIndex](const CallContextInfo &A, const CallContextInfo &B) {
2080 return A.StackIds.size() > B.StackIds.size() ||
2081 (A.StackIds.size() == B.StackIds.size() &&
2082 (A.StackIds < B.StackIds ||
2083 (A.StackIds == B.StackIds &&
2084 FuncToIndex[A.Func] < FuncToIndex[B.Func])));
2085 });
2086
2087
2088
2089
2090 uint64_t LastId = It.getFirst();
2091 ContextNode *LastNode = getNodeForStackId(LastId);
2092
2094
2095 if (LastNode->Recursive)
2096 continue;
2097
2098
2099
2100 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2102
2103#ifndef NDEBUG
2104
2105
2106
2107
2108 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2109#endif
2110
2111 for (unsigned I = 0; I < Calls.size(); I++) {
2112 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
2113 assert(SavedContextIds.empty());
2114 assert(LastId == Ids.back());
2115
2116#ifndef NDEBUG
2117
2118
2119 if (I > 0 && Ids != Calls[I - 1].StackIds)
2120 MatchingIdsFuncSet.clear();
2121#endif
2122
2123
2124
2125
2127 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2128
2129 ContextNode *PrevNode = LastNode;
2130 ContextNode *CurNode = LastNode;
2131 bool Skip = false;
2132
2133
2134
2135 for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2136 auto Id = *IdIter;
2137 CurNode = getNodeForStackId(Id);
2138
2140
2141 if (CurNode->Recursive) {
2142 Skip = true;
2143 break;
2144 }
2145
2146 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156 if () {
2157 Skip = true;
2158 break;
2159 }
2160 PrevNode = CurNode;
2161
2162
2163
2165
2166
2167 if (StackSequenceContextIds.empty()) {
2168 Skip = true;
2169 break;
2170 }
2171 }
2172 if (Skip)
2173 continue;
2174
2175
2176
2177
2178
2179
2180 if (Ids.back() != getLastStackId(Call)) {
2181 for (const auto &PE : LastNode->CallerEdges) {
2182 set_subtract(StackSequenceContextIds, PE->getContextIds());
2183 if (StackSequenceContextIds.empty())
2184 break;
2185 }
2186
2187 if (StackSequenceContextIds.empty())
2188 continue;
2189 }
2190
2191#ifndef NDEBUG
2192
2193
2194
2195
2196
2198
2199 MatchingIdsFuncSet.insert(Func);
2200#endif
2201
2202
2203
2204
2205
2206 bool DuplicateContextIds = false;
2207 for (unsigned J = I + 1; J < Calls.size(); J++) {
2208 auto &CallCtxInfo = Calls[J];
2209 auto &NextIds = CallCtxInfo.StackIds;
2210 if (NextIds != Ids)
2211 break;
2212 auto *NextFunc = CallCtxInfo.Func;
2213 if (NextFunc != Func) {
2214
2215
2216 DuplicateContextIds = true;
2217 break;
2218 }
2219 auto &NextCall = CallCtxInfo.Call;
2220 CallToMatchingCall[NextCall] = Call;
2221
2222 I = J;
2223 }
2224
2225
2226
2227
2228
2229
2230
2231 OldToNewContextIds.reserve(OldToNewContextIds.size() +
2232 StackSequenceContextIds.size());
2233 SavedContextIds =
2234 DuplicateContextIds
2235 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2236 : StackSequenceContextIds;
2237 assert(!SavedContextIds.empty());
2238
2239 if (!DuplicateContextIds) {
2240
2241
2242
2243 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2244 if (LastNodeContextIds.empty())
2245 break;
2246 }
2247 }
2248 }
2249
2250
2251 propagateDuplicateContextIds(OldToNewContextIds);
2252
2254 check();
2255
2256
2257
2258
2259
2260
2261 DenseSet<const ContextNode *> Visited;
2263 ImportantContextIdInfo.keys());
2264 for (auto &Entry : AllocationCallToContextNodeMap)
2265 assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
2266 CallToMatchingCall, ImportantContextIds);
2267
2268 fixupImportantContexts();
2269
2271 check();
2272}
2273
2274uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
2275 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2277 return CallsiteContext.back();
2278}
2279
2280uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
2282 CallStack<CallsiteInfo, SmallVector::const_iterator>
2284
2285 return Index.getStackIdAtIndex(CallsiteContext.back());
2286}
2287
2289
2291
2292
2293 if (!CloneNo)
2294 return Base.str();
2296}
2297
2301
2302
2303
2304
2307 auto Pos = F.getName().find_last_of('.');
2309 unsigned CloneNo;
2310 bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2312 (void)Err;
2313 return CloneNo;
2314}
2315
2316std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
2317 const Instruction *Call,
2318 unsigned CloneNo) const {
2321 .str();
2322}
2323
2324std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
2325 const IndexCall &Call,
2326 unsigned CloneNo) const {
2327 auto VI = FSToVIMap.find(Func);
2328 assert(VI != FSToVIMap.end());
2331 return CallerName + " -> alloc";
2332 else {
2334 return CallerName + " -> " +
2336 Callsite->Clones[CloneNo]);
2337 }
2338}
2339
2340std::vector<uint64_t>
2341ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2342 Instruction *Call) {
2343 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2345 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2346 CallsiteContext);
2347}
2348
2349std::vector<uint64_t>
2350IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
2352 CallStack<CallsiteInfo, SmallVector::const_iterator>
2354 return getStackIdsWithContextNodes<CallsiteInfo,
2355 SmallVector::const_iterator>(
2356 CallsiteContext);
2357}
2358
2359template <typename DerivedCCG, typename FuncTy, typename CallTy>
2360template <class NodeT, class IteratorT>
2361std::vector<uint64_t>
2362CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2363 CallStack<NodeT, IteratorT> &CallsiteContext) {
2364 std::vector<uint64_t> StackIds;
2365 for (auto IdOrIndex : CallsiteContext) {
2366 auto StackId = getStackId(IdOrIndex);
2367 ContextNode *Node = getNodeForStackId(StackId);
2368 if (!Node)
2369 break;
2370 StackIds.push_back(StackId);
2371 }
2372 return StackIds;
2373}
2374
2375ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2377 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2378 : Mod(M), OREGetter(OREGetter) {
2379
2380
2381
2382 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2383 for (auto &F : M) {
2384 std::vector CallsWithMetadata;
2385 for (auto &BB : F) {
2386 for (auto &I : BB) {
2388 continue;
2389 if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
2390 CallsWithMetadata.push_back(&I);
2391 auto *AllocNode = addAllocNode(&I, &F);
2392 auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
2395
2396 for (auto &MDOp : MemProfMD->operands()) {
2398 std::vector ContextSizeInfo;
2399
2400 if (MIBMD->getNumOperands() > 2) {
2401 for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
2402 MDNode *ContextSizePair =
2407 ->getZExtValue();
2410 ->getZExtValue();
2411 ContextSizeInfo.push_back({FullStackId, TotalSize});
2412 }
2413 }
2417 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2418 AllocNode, StackContext, CallsiteContext,
2420 TotalSizeToContextIdTopNCold);
2421 }
2422
2423
2425 DotAllocContextIds = AllocNode->getContextIds();
2427
2428
2429 I.setMetadata(LLVMContext::MD_memprof, nullptr);
2430 I.setMetadata(LLVMContext::MD_callsite, nullptr);
2431 }
2432
2433 else if (I.getMetadata(LLVMContext::MD_callsite)) {
2434 CallsWithMetadata.push_back(&I);
2435 }
2436 }
2437 }
2438 if (!CallsWithMetadata.empty())
2439 FuncToCallsWithMetadata[&F] = CallsWithMetadata;
2440 }
2441
2443 dbgs() << "CCG before updating call stack chains:\n";
2444 dbgs() << *this;
2445 }
2446
2448 exportToDot("prestackupdate");
2449
2450 updateStackNodes();
2451
2453 exportToDot("poststackupdate");
2454
2455 handleCallsitesWithMultipleTargets();
2456
2457 markBackedges();
2458
2459
2460 for (auto &FuncEntry : FuncToCallsWithMetadata)
2461 for (auto &Call : FuncEntry.second)
2462 Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
2463}
2464
2465IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2468 isPrevailing)
2469 : Index(Index), isPrevailing(isPrevailing) {
2470
2471
2472
2473 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2474 for (auto &I : Index) {
2475 auto VI = Index.getValueInfo(I);
2476 for (auto &S : VI.getSummaryList()) {
2477
2478
2479
2480
2481
2482
2483
2485 !isPrevailing(VI.getGUID(), S.get()))
2486 continue;
2488 if (!FS)
2489 continue;
2490 std::vector CallsWithMetadata;
2491 if (->allocs().empty()) {
2492 for (auto &AN : FS->mutableAllocs()) {
2493
2494
2495
2496
2497 if (AN.MIBs.empty())
2498 continue;
2499 IndexCall AllocCall(&AN);
2500 CallsWithMetadata.push_back(AllocCall);
2501 auto *AllocNode = addAllocNode(AllocCall, FS);
2502
2503
2504
2506 EmptyContext;
2507 unsigned I = 0;
2509 AN.ContextSizeInfos.size() == AN.MIBs.size());
2510
2511 for (auto &MIB : AN.MIBs) {
2513 StackContext(&MIB);
2514 std::vector ContextSizeInfo;
2515 if (!AN.ContextSizeInfos.empty()) {
2516 for (auto [FullStackId, TotalSize] : AN.ContextSizeInfos[I])
2517 ContextSizeInfo.push_back({FullStackId, TotalSize});
2518 }
2519 addStackNodesForMIB<MIBInfo, SmallVector::const_iterator>(
2520 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2521 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2522 I++;
2523 }
2524
2525
2527 DotAllocContextIds = AllocNode->getContextIds();
2529
2530
2531
2532
2533 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2534 }
2535 }
2536
2537 if (->callsites().empty())
2538 for (auto &SN : FS->mutableCallsites()) {
2539 IndexCall StackNodeCall(&SN);
2540 CallsWithMetadata.push_back(StackNodeCall);
2541 }
2542
2543 if (!CallsWithMetadata.empty())
2544 FuncToCallsWithMetadata[FS] = CallsWithMetadata;
2545
2546 if (->allocs().empty() ||
->callsites().empty())
2547 FSToVIMap[FS] = VI;
2548 }
2549 }
2550
2552 dbgs() << "CCG before updating call stack chains:\n";
2553 dbgs() << *this;
2554 }
2555
2557 exportToDot("prestackupdate");
2558
2559 updateStackNodes();
2560
2562 exportToDot("poststackupdate");
2563
2564 handleCallsitesWithMultipleTargets();
2565
2566 markBackedges();
2567}
2568
2569template <typename DerivedCCG, typename FuncTy, typename CallTy>
2570void CallsiteContextGraph<DerivedCCG, FuncTy,
2571 CallTy>::handleCallsitesWithMultipleTargets() {
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2585
2586 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2587 for (auto &Entry : NonAllocationCallToContextNodeMap) {
2588 auto *Node = Entry.second;
2590
2591
2592
2593
2594
2595
2596
2597 std::vector AllCalls;
2598 AllCalls.reserve(Node->MatchingCalls.size() + 1);
2599 AllCalls.push_back(Node->Call);
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613 if (partitionCallsByCallee(Node, AllCalls, NewCallToNode))
2614 continue;
2615
2616 auto It = AllCalls.begin();
2617
2618 for (; It != AllCalls.end(); ++It) {
2620 bool Match = true;
2621 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();
2622 ++EI) {
2623 auto Edge = *EI;
2624 if (!Edge->Callee->hasCall())
2625 continue;
2626 assert(NodeToCallingFunc.count(Edge->Callee));
2627
2628 if (!calleesMatch(ThisCall.call(), EI, TailCallToContextNodeMap)) {
2629 Match = false;
2630 break;
2631 }
2632 }
2633
2634 if (Match) {
2635
2636
2637 if (Node->Call != ThisCall) {
2638 Node->setCall(ThisCall);
2639
2640
2641
2643 }
2644 break;
2645 }
2646 }
2647
2648
2649 Node->MatchingCalls.clear();
2650
2651
2652 if (It == AllCalls.end()) {
2653 RemovedEdgesWithMismatchedCallees++;
2654
2655
2656
2657 Node->setCall(CallInfo());
2658 continue;
2659 }
2660
2661
2662 for (++It; It != AllCalls.end(); ++It) {
2664 if (!sameCallee(Node->Call.call(), ThisCall.call()))
2665 continue;
2666 Node->MatchingCalls.push_back(ThisCall);
2667 }
2668 }
2669
2670
2671
2672
2673
2674
2675 NonAllocationCallToContextNodeMap.remove_if([](const auto &it) {
2676 return !it.second->hasCall() || it.second->Call != it.first;
2677 });
2678
2679
2680 for (auto &[Call, Node] : NewCallToNode)
2681 NonAllocationCallToContextNodeMap[Call] = Node;
2682
2683
2684
2685 for (auto &[Call, Node] : TailCallToContextNodeMap)
2686 NonAllocationCallToContextNodeMap[Call] = Node;
2687}
2688
2689template <typename DerivedCCG, typename FuncTy, typename CallTy>
2690bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2692 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2693
2694
2695
2696 struct CallsWithSameCallee {
2697 std::vector Calls;
2698 ContextNode *Node = nullptr;
2699 };
2700
2701
2702
2704 for (auto ThisCall : AllCalls) {
2705 auto *F = getCalleeFunc(ThisCall.call());
2706 if (F)
2707 CalleeFuncToCallInfo[F].Calls.push_back(ThisCall);
2708 }
2709
2710
2711
2712
2713
2714
2716 for (const auto &Edge : Node->CalleeEdges) {
2717 if (!Edge->Callee->hasCall())
2718 continue;
2719 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2720 if (CalleeFuncToCallInfo.contains(ProfiledCalleeFunc))
2721 CalleeNodeToCallInfo[Edge->Callee] =
2722 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2723 }
2724
2725
2726
2727
2728 if (CalleeNodeToCallInfo.empty())
2729 return false;
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740 ContextNode *UnmatchedCalleesNode = nullptr;
2741
2742 bool UsedOrigNode = false;
2744
2745
2746
2747 auto CalleeEdges = Node->CalleeEdges;
2748 for (auto &Edge : CalleeEdges) {
2749 if (!Edge->Callee->hasCall())
2750 continue;
2751
2752
2753
2754 ContextNode *CallerNodeToUse = nullptr;
2755
2756
2757
2758 if (!CalleeNodeToCallInfo.contains(Edge->Callee)) {
2759 if (!UnmatchedCalleesNode)
2760 UnmatchedCalleesNode =
2761 createNewNode(false, NodeToCallingFunc[Node]);
2762 CallerNodeToUse = UnmatchedCalleesNode;
2763 } else {
2764
2765
2766 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2767 if (->Node) {
2768
2769 if (!UsedOrigNode) {
2771
2772 Node->MatchingCalls.clear();
2773 UsedOrigNode = true;
2774 } else
2775 Info->Node =
2776 createNewNode(false, NodeToCallingFunc[Node]);
2778
2779
2780 Info->Node->setCall(Info->Calls.front());
2783
2784
2785
2786 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2787 }
2788 CallerNodeToUse = Info->Node;
2789 }
2790
2791
2792 if (CallerNodeToUse == Node)
2793 continue;
2794
2795 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2796 }
2797
2798
2799
2800
2801
2802 for (auto &I : CalleeNodeToCallInfo)
2803 removeNoneTypeCallerEdges(I.second->Node);
2804 if (UnmatchedCalleesNode)
2805 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2806 removeNoneTypeCallerEdges(Node);
2807
2808 return true;
2809}
2810
2811uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
2812
2813 return IdOrIndex;
2814}
2815
2816uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
2817
2818
2819 return Index.getStackIdAtIndex(IdOrIndex);
2820}
2821
2822template <typename DerivedCCG, typename FuncTy, typename CallTy>
2823bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2824 CallTy Call, EdgeIter &EI,
2825 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2826 auto Edge = *EI;
2827 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2828 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2829
2830
2831 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2832 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2833 FoundCalleeChain))
2834 return false;
2835
2836
2837 if (FoundCalleeChain.empty())
2838 return true;
2839
2840 auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) {
2841 auto *CurEdge = Callee->findEdgeFromCaller(Caller);
2842
2843
2844 if (CurEdge) {
2845 CurEdge->ContextIds.insert_range(Edge->ContextIds);
2846 CurEdge->AllocTypes |= Edge->AllocTypes;
2847 return;
2848 }
2849
2850
2851 auto NewEdge = std::make_shared(
2852 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2853 Callee->CallerEdges.push_back(NewEdge);
2854 if (Caller == Edge->Caller) {
2855
2856
2857
2858 EI = Caller->CalleeEdges.insert(EI, NewEdge);
2859 ++EI;
2861 "Iterator position not restored after insert and increment");
2862 } else
2863 Caller->CalleeEdges.push_back(NewEdge);
2864 };
2865
2866
2867
2868 auto *CurCalleeNode = Edge->Callee;
2869 for (auto &[NewCall, Func] : FoundCalleeChain) {
2870 ContextNode *NewNode = nullptr;
2871
2872 if (TailCallToContextNodeMap.count(NewCall)) {
2873 NewNode = TailCallToContextNodeMap[NewCall];
2874 NewNode->AllocTypes |= Edge->AllocTypes;
2875 } else {
2876 FuncToCallsWithMetadata[Func].push_back({NewCall});
2877
2878 NewNode = createNewNode(false, Func, NewCall);
2879 TailCallToContextNodeMap[NewCall] = NewNode;
2880 NewNode->AllocTypes = Edge->AllocTypes;
2881 }
2882
2883
2884 AddEdge(NewNode, CurCalleeNode);
2885
2886 CurCalleeNode = NewNode;
2887 }
2888
2889
2890 AddEdge(Edge->Caller, CurCalleeNode);
2891
2892#ifndef NDEBUG
2893
2895#endif
2896
2897
2898 removeEdgeFromGraph(Edge.get(), &EI, true);
2899
2900
2901
2902
2903
2905 --EI;
2906
2907 return true;
2908}
2909
2910bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2911 const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
2912 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2913 bool &FoundMultipleCalleeChains) {
2914
2915
2917 return false;
2918
2920 FoundCalleeChain.push_back({Callsite, F});
2921 };
2922
2924 if (!CalleeFunc) {
2929 }
2930
2931
2932
2933
2934
2935 bool FoundSingleCalleeChain = false;
2936 for (auto &BB : *CalleeFunc) {
2937 for (auto &I : BB) {
2939 if (!CB || !CB->isTailCall())
2940 continue;
2941 auto *CalledValue = CB->getCalledOperand();
2942 auto *CalledFunction = CB->getCalledFunction();
2943 if (CalledValue && !CalledFunction) {
2944 CalledValue = CalledValue->stripPointerCasts();
2945
2947 }
2948
2949
2951 assert(!CalledFunction &&
2952 "Expected null called function in callsite for alias");
2954 }
2955 if (!CalledFunction)
2956 continue;
2957 if (CalledFunction == ProfiledCallee) {
2958 if (FoundSingleCalleeChain) {
2959 FoundMultipleCalleeChains = true;
2960 return false;
2961 }
2962 FoundSingleCalleeChain = true;
2963 FoundProfiledCalleeCount++;
2964 FoundProfiledCalleeDepth += Depth;
2965 if (Depth > FoundProfiledCalleeMaxDepth)
2966 FoundProfiledCalleeMaxDepth = Depth;
2967 SaveCallsiteInfo(&I, CalleeFunc);
2968 } else if (findProfiledCalleeThroughTailCalls(
2969 ProfiledCallee, CalledFunction, Depth + 1,
2970 FoundCalleeChain, FoundMultipleCalleeChains)) {
2971
2972
2973 assert(!FoundMultipleCalleeChains);
2974 if (FoundSingleCalleeChain) {
2975 FoundMultipleCalleeChains = true;
2976 return false;
2977 }
2978 FoundSingleCalleeChain = true;
2979 SaveCallsiteInfo(&I, CalleeFunc);
2980 } else if (FoundMultipleCalleeChains)
2981 return false;
2982 }
2983 }
2984
2985 return FoundSingleCalleeChain;
2986}
2987
2988const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *Call) {
2990 if (!CB->getCalledOperand() || CB->isIndirectCall())
2991 return nullptr;
2992 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2994 if (Alias)
2997}
2998
2999bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3000 Instruction *Call, const Function *Func, const Function *CallerFunc,
3001 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3003 if (!CB->getCalledOperand() || CB->isIndirectCall())
3004 return false;
3005 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3007 if (CalleeFunc == Func)
3008 return true;
3010 if (Alias && Alias->getAliasee() == Func)
3011 return true;
3012
3013
3014
3015
3016
3017
3018
3019
3020 unsigned Depth = 1;
3021 bool FoundMultipleCalleeChains = false;
3022 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth,
3023 FoundCalleeChain,
3024 FoundMultipleCalleeChains)) {
3025 LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: "
3026 << Func->getName() << " from " << CallerFunc->getName()
3027 << " that actually called " << CalleeVal->getName()
3028 << (FoundMultipleCalleeChains
3029 ? " (found multiple possible chains)"
3030 : "")
3031 << "\n");
3032 if (FoundMultipleCalleeChains)
3033 FoundProfiledCalleeNonUniquelyCount++;
3034 return false;
3035 }
3036
3037 return true;
3038}
3039
3040bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3041 Instruction *Call2) {
3043 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3044 return false;
3045 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3048 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3049 return false;
3050 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3052 return CalleeFunc1 == CalleeFunc2;
3053}
3054
3055bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3056 ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
3057 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3058 bool &FoundMultipleCalleeChains) {
3059
3060
3062 return false;
3063
3064 auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) {
3065
3066
3067 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3068 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3069
3070
3071 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
3072 std::make_unique(Callee, SmallVector());
3073 CallsiteInfo *NewCallsiteInfo =
3074 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get();
3075 FoundCalleeChain.push_back({NewCallsiteInfo, FS});
3076 };
3077
3078
3079
3080
3081
3082 bool FoundSingleCalleeChain = false;
3085 !isPrevailing(CurCallee.getGUID(), S.get()))
3086 continue;
3088 if (!FS)
3089 continue;
3090 auto FSVI = CurCallee;
3092 if (AS)
3093 FSVI = AS->getAliaseeVI();
3094 for (auto &CallEdge : FS->calls()) {
3095 if (!CallEdge.second.hasTailCall())
3096 continue;
3097 if (CallEdge.first == ProfiledCallee) {
3098 if (FoundSingleCalleeChain) {
3099 FoundMultipleCalleeChains = true;
3100 return false;
3101 }
3102 FoundSingleCalleeChain = true;
3103 FoundProfiledCalleeCount++;
3104 FoundProfiledCalleeDepth += Depth;
3105 if (Depth > FoundProfiledCalleeMaxDepth)
3106 FoundProfiledCalleeMaxDepth = Depth;
3107 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3108
3109 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3110 FSToVIMap[FS] = FSVI;
3111 } else if (findProfiledCalleeThroughTailCalls(
3112 ProfiledCallee, CallEdge.first, Depth + 1,
3113 FoundCalleeChain, FoundMultipleCalleeChains)) {
3114
3115
3116 assert(!FoundMultipleCalleeChains);
3117 if (FoundSingleCalleeChain) {
3118 FoundMultipleCalleeChains = true;
3119 return false;
3120 }
3121 FoundSingleCalleeChain = true;
3122 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3123
3124 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3125 FSToVIMap[FS] = FSVI;
3126 } else if (FoundMultipleCalleeChains)
3127 return false;
3128 }
3129 }
3130
3131 return FoundSingleCalleeChain;
3132}
3133
3134const FunctionSummary *
3135IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
3137 if (Callee.getSummaryList().empty())
3138 return nullptr;
3140}
3141
3142bool IndexCallsiteContextGraph::calleeMatchesFunc(
3143 IndexCall &Call, const FunctionSummary *Func,
3144 const FunctionSummary *CallerFunc,
3145 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3147
3148
3149 AliasSummary *Alias =
3150 Callee.getSummaryList().empty()
3151 ? nullptr
3153 assert(FSToVIMap.count(Func));
3154 auto FuncVI = FSToVIMap[Func];
3155 if (Callee == FuncVI ||
3156
3157
3158
3160 return true;
3161
3162
3163
3164
3165
3166
3167
3168
3169 unsigned Depth = 1;
3170 bool FoundMultipleCalleeChains = false;
3171 if (!findProfiledCalleeThroughTailCalls(
3172 FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3173 LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI
3174 << " from " << FSToVIMap[CallerFunc]
3175 << " that actually called " << Callee
3176 << (FoundMultipleCalleeChains
3177 ? " (found multiple possible chains)"
3178 : "")
3179 << "\n");
3180 if (FoundMultipleCalleeChains)
3181 FoundProfiledCalleeNonUniquelyCount++;
3182 return false;
3183 }
3184
3185 return true;
3186}
3187
3188bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3191 return Callee1 == Callee2;
3192}
3193
3194template <typename DerivedCCG, typename FuncTy, typename CallTy>
3195void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3196 const {
3198 dbgs() << "\n";
3199}
3200
3201template <typename DerivedCCG, typename FuncTy, typename CallTy>
3202void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3203 raw_ostream &OS) const {
3204 OS << "Node " << this << "\n";
3205 OS << "\t";
3206 printCall(OS);
3207 if (Recursive)
3208 OS << " (recursive)";
3209 OS << "\n";
3210 if (!MatchingCalls.empty()) {
3211 OS << "\tMatchingCalls:\n";
3212 for (auto &MatchingCall : MatchingCalls) {
3213 OS << "\t";
3214 MatchingCall.print(OS);
3215 OS << "\n";
3216 }
3217 }
3218 OS << "\tNodeId: " << NodeId << "\n";
3220 OS << "\tContextIds:";
3221
3222 auto ContextIds = getContextIds();
3223 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3224 std::sort(SortedIds.begin(), SortedIds.end());
3225 for (auto Id : SortedIds)
3226 OS << " " << Id;
3227 OS << "\n";
3228 OS << "\tCalleeEdges:\n";
3229 for (auto &Edge : CalleeEdges)
3230 OS << "\t\t" << *Edge << " (Callee NodeId: " << Edge->Callee->NodeId
3231 << ")\n";
3232 OS << "\tCallerEdges:\n";
3233 for (auto &Edge : CallerEdges)
3234 OS << "\t\t" << *Edge << " (Caller NodeId: " << Edge->Caller->NodeId
3235 << ")\n";
3236 if (!Clones.empty()) {
3237 OS << "\tClones: ";
3238 bool First = true;
3239 for (auto *C : Clones) {
3241 OS << ", ";
3243 OS << C << " NodeId: " << C->NodeId;
3244 }
3245 OS << "\n";
3246 } else if (CloneOf) {
3247 OS << "\tClone of " << CloneOf << " NodeId: " << CloneOf->NodeId << "\n";
3248 }
3249}
3250
3251template <typename DerivedCCG, typename FuncTy, typename CallTy>
3252void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3253 const {
3255 dbgs() << "\n";
3256}
3257
3258template <typename DerivedCCG, typename FuncTy, typename CallTy>
3259void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3260 raw_ostream &OS) const {
3261 OS << "Edge from Callee " << Callee << " to Caller: " << Caller
3262 << (IsBackedge ? " (BE)" : "")
3264 OS << " ContextIds:";
3265 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3266 std::sort(SortedIds.begin(), SortedIds.end());
3267 for (auto Id : SortedIds)
3268 OS << " " << Id;
3269}
3270
3271template <typename DerivedCCG, typename FuncTy, typename CallTy>
3272void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {
3274}
3275
3276template <typename DerivedCCG, typename FuncTy, typename CallTy>
3277void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3278 raw_ostream &OS) const {
3279 OS << "Callsite Context Graph:\n";
3280 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3282 if (Node->isRemoved())
3283 continue;
3284 Node->print(OS);
3285 OS << "\n";
3286 }
3287}
3288
3289template <typename DerivedCCG, typename FuncTy, typename CallTy>
3290void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3291 raw_ostream &OS) const {
3292 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3294 if (Node->isRemoved())
3295 continue;
3296 if (->IsAllocation)
3297 continue;
3298 DenseSet<uint32_t> ContextIds = Node->getContextIds();
3299 auto AllocTypeFromCall = getAllocationCallType(Node->Call);
3300 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3301 std::sort(SortedIds.begin(), SortedIds.end());
3302 for (auto Id : SortedIds) {
3303 auto TypeI = ContextIdToAllocationType.find(Id);
3304 assert(TypeI != ContextIdToAllocationType.end());
3305 auto CSI = ContextIdToContextSizeInfos.find(Id);
3306 if (CSI != ContextIdToContextSizeInfos.end()) {
3307 for (auto &Info : CSI->second) {
3308 OS << "MemProf hinting: "
3310 << " full allocation context " << Info.FullStackId
3311 << " with total size " << Info.TotalSize << " is "
3313 if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)
3315 << " due to cold byte percent";
3316
3317 OS << " (context id " << Id << ")";
3318 OS << "\n";
3319 }
3320 }
3321 }
3322 }
3323}
3324
3325template <typename DerivedCCG, typename FuncTy, typename CallTy>
3326void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
3327 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3330 for (auto &Edge : Node->CallerEdges)
3332 }
3333}
3334
3335template <typename DerivedCCG, typename FuncTy, typename CallTy>
3336struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {
3337 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3338 using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3339
3340 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3342
3346
3350
3354
3356 return G->NodeOwner.begin()->get();
3357 }
3358
3359 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3360 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3362 return P->Callee;
3363 }
3364
3366 mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<
3369
3373
3377};
3378
3379template <typename DerivedCCG, typename FuncTy, typename CallTy>
3383
3384
3385
3386
3387 DoHighlight =
3391 }
3392
3393 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3397
3399 std::string LabelString =
3400 (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
3401 Twine(Node->OrigStackOrAllocId) + " NodeId: " + Twine(Node->NodeId))
3402 .str();
3403 LabelString += "\n";
3404 if (Node->hasCall()) {
3405 auto Func = G->NodeToCallingFunc.find(Node);
3406 assert(Func != G->NodeToCallingFunc.end());
3407 LabelString +=
3408 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
3409 } else {
3410 LabelString += "null call";
3411 if (Node->Recursive)
3412 LabelString += " (recursive)";
3413 else
3414 LabelString += " (external)";
3415 }
3416 return LabelString;
3417 }
3418
3420 auto ContextIds = Node->getContextIds();
3421
3422
3423
3424 bool Highlight = false;
3425 if (DoHighlight) {
3430 else
3431 Highlight = set_intersects(ContextIds, G->DotAllocContextIds);
3432 }
3433 std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
3434 getContextIds(ContextIds) + "\"")
3435 .str();
3436
3437 if (Highlight)
3438 AttributeString += ",fontsize=\"30\"";
3439 AttributeString +=
3440 (Twine(",fillcolor=\"") + getColor(Node->AllocTypes, Highlight) + "\"")
3441 .str();
3442 if (Node->CloneOf) {
3443 AttributeString += ",color=\"blue\"";
3444 AttributeString += ",style=\"filled,bold,dashed\"";
3445 } else
3446 AttributeString += ",style=\"filled\"";
3447 return AttributeString;
3448 }
3449
3452 auto &Edge = *(ChildIter.getCurrent());
3453
3454
3455
3456
3457 bool Highlight = false;
3458 if (DoHighlight) {
3463 else
3464 Highlight = set_intersects(Edge->ContextIds, G->DotAllocContextIds);
3465 }
3466 auto Color = getColor(Edge->AllocTypes, Highlight);
3467 std::string AttributeString =
3468 (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
3469
3470 Twine(",fillcolor=\"") + Color + "\"" + Twine(",color=\"") + Color +
3471 "\"")
3472 .str();
3473 if (Edge->IsBackedge)
3474 AttributeString += ",style=\"dotted\"";
3475
3476 if (Highlight)
3477 AttributeString += ",penwidth=\"2.0\",weight=\"2\"";
3478 return AttributeString;
3479 }
3480
3481
3482
3484 if (Node->isRemoved())
3485 return true;
3486
3487
3492 return false;
3493 }
3494
3495private:
3496 static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {
3497 std::string IdString = "ContextIds:";
3498 if (ContextIds.size() < 100) {
3499 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3500 std::sort(SortedIds.begin(), SortedIds.end());
3501 for (auto Id : SortedIds)
3502 IdString += (" " + Twine(Id)).str();
3503 } else {
3504 IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();
3505 }
3506 return IdString;
3507 }
3508
3509 static std::string getColor(uint8_t AllocTypes, bool Highlight) {
3510
3511
3512
3513
3514
3515 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3516
3517 return || Highlight ? "brown1" : "lightpink";
3518 if (AllocTypes == (uint8_t)AllocationType::Cold)
3519 return || Highlight ? "cyan" : "lightskyblue";
3520 if (AllocTypes ==
3521 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3522 return Highlight ? "magenta" : "mediumorchid1";
3523 return "gray";
3524 }
3525
3526 static std::string getNodeId(NodeRef Node) {
3527 std::stringstream SStream;
3528 SStream << std::hex << "N0x" << (unsigned long long)Node;
3529 std::string Result = SStream.str();
3531 }
3532
3533
3534
3536};
3537
3538template <typename DerivedCCG, typename FuncTy, typename CallTy>
3539bool DOTGraphTraits<
3540 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>::DoHighlight =
3541 false;
3542
3543template <typename DerivedCCG, typename FuncTy, typename CallTy>
3544void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3545 std::string Label) const {
3548}
3549
3550template <typename DerivedCCG, typename FuncTy, typename CallTy>
3551typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3552CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3553 const std::shared_ptr &Edge,
3554 DenseSet<uint32_t> ContextIdsToMove) {
3555 ContextNode *Node = Edge->Callee;
3556 assert(NodeToCallingFunc.count(Node));
3557 ContextNode *Clone =
3558 createNewNode(Node->IsAllocation, NodeToCallingFunc[Node], Node->Call);
3559 Node->addClone(Clone);
3560 Clone->MatchingCalls = Node->MatchingCalls;
3561 moveEdgeToExistingCalleeClone(Edge, Clone, true,
3562 ContextIdsToMove);
3563 return Clone;
3564}
3565
3566template <typename DerivedCCG, typename FuncTy, typename CallTy>
3567void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3568 moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge,
3569 ContextNode *NewCallee, bool NewClone,
3570 DenseSet<uint32_t> ContextIdsToMove) {
3571
3572
3573 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3574
3575 bool EdgeIsRecursive = Edge->Callee == Edge->Caller;
3576
3577 ContextNode *OldCallee = Edge->Callee;
3578
3579
3580
3581 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3582
3583
3584
3585 if (ContextIdsToMove.empty())
3586 ContextIdsToMove = Edge->getContextIds();
3587
3588
3589
3590 if (Edge->getContextIds().size() == ContextIdsToMove.size()) {
3591
3592
3593 NewCallee->AllocTypes |= Edge->AllocTypes;
3594
3595 if (ExistingEdgeToNewCallee) {
3596
3597
3598 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3599 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3600 assert(Edge->ContextIds == ContextIdsToMove);
3601 removeEdgeFromGraph(Edge.get());
3602 } else {
3603
3604 Edge->Callee = NewCallee;
3605 NewCallee->CallerEdges.push_back(Edge);
3606
3607 OldCallee->eraseCallerEdge(Edge.get());
3608
3609
3610 }
3611 } else {
3612
3613
3614 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3615 if (ExistingEdgeToNewCallee) {
3616
3617
3618 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3619 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3620 } else {
3621
3622 auto NewEdge = std::make_shared(
3623 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3624 Edge->Caller->CalleeEdges.push_back(NewEdge);
3625 NewCallee->CallerEdges.push_back(NewEdge);
3626 }
3627
3628
3629 NewCallee->AllocTypes |= CallerEdgeAllocType;
3631 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3632 }
3633
3634
3635
3636 for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3637 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3638
3639
3640
3641 if (CalleeToUse == OldCallee) {
3642
3643
3644
3645 if (EdgeIsRecursive) {
3647 continue;
3648 }
3649 CalleeToUse = NewCallee;
3650 }
3651
3652
3653 DenseSet<uint32_t> EdgeContextIdsToMove =
3654 set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove);
3655 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3656 OldCalleeEdge->AllocTypes =
3657 computeAllocType(OldCalleeEdge->getContextIds());
3658 if (!NewClone) {
3659
3660
3661
3662
3663
3664 if (auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3665 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3666 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3667 continue;
3668 }
3669 }
3670 auto NewEdge = std::make_shared(
3671 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3672 EdgeContextIdsToMove);
3673 NewCallee->CalleeEdges.push_back(NewEdge);
3674 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3675 }
3676
3677
3678 OldCallee->AllocTypes = OldCallee->computeAllocType();
3679
3680 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3681 OldCallee->emptyContextIds());
3685 for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3687 false);
3688 for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3690 false);
3691 }
3692}
3693
3694template <typename DerivedCCG, typename FuncTy, typename CallTy>
3695void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3696 moveCalleeEdgeToNewCaller(const std::shared_ptr &Edge,
3697 ContextNode *NewCaller) {
3698 auto *OldCallee = Edge->Callee;
3699 auto *NewCallee = OldCallee;
3700
3701
3702 bool Recursive = Edge->Caller == Edge->Callee;
3703 if (Recursive)
3704 NewCallee = NewCaller;
3705
3706 ContextNode *OldCaller = Edge->Caller;
3707 OldCaller->eraseCalleeEdge(Edge.get());
3708
3709
3710
3711 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3712
3713 if (ExistingEdgeToNewCaller) {
3714
3715
3716 ExistingEdgeToNewCaller->getContextIds().insert_range(
3717 Edge->getContextIds());
3718 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3719 Edge->ContextIds.clear();
3720 Edge->AllocTypes = (uint8_t)AllocationType::None;
3721 OldCallee->eraseCallerEdge(Edge.get());
3722 } else {
3723
3724 Edge->Caller = NewCaller;
3725 NewCaller->CalleeEdges.push_back(Edge);
3726 if (Recursive) {
3727 assert(NewCallee == NewCaller);
3728
3729
3730 Edge->Callee = NewCallee;
3731 NewCallee->CallerEdges.push_back(Edge);
3732 OldCallee->eraseCallerEdge(Edge.get());
3733 }
3734
3735
3736 }
3737
3738 NewCaller->AllocTypes |= Edge->AllocTypes;
3739
3740
3741
3742
3743
3744#ifndef NDEBUG
3745 bool IsNewNode = NewCaller->CallerEdges.empty();
3746#endif
3747
3748
3749
3750
3751
3752
3753 if (!Recursive) {
3754 for (auto &OldCallerEdge : OldCaller->CallerEdges) {
3755 auto OldCallerCaller = OldCallerEdge->Caller;
3756
3757
3759 OldCallerEdge->getContextIds(), Edge->getContextIds());
3760 if (OldCaller == OldCallerCaller) {
3761 OldCallerCaller = NewCaller;
3762
3763
3764
3765 continue;
3766 }
3767 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3768 OldCallerEdge->AllocTypes =
3769 computeAllocType(OldCallerEdge->getContextIds());
3770
3771
3772
3773
3774 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3775
3776
3778 if (ExistingCallerEdge) {
3779 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3780 ExistingCallerEdge->AllocTypes |=
3781 computeAllocType(EdgeContextIdsToMove);
3782 continue;
3783 }
3784 auto NewEdge = std::make_shared(
3785 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3786 EdgeContextIdsToMove);
3787 NewCaller->CallerEdges.push_back(NewEdge);
3788 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3789 }
3790 }
3791
3792
3793 OldCaller->AllocTypes = OldCaller->computeAllocType();
3794
3795 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3796 OldCaller->emptyContextIds());
3800 for (const auto &OldCallerEdge : OldCaller->CallerEdges)
3802 false);
3803 for (const auto &NewCallerEdge : NewCaller->CallerEdges)
3805 false);
3806 }
3807}
3808
3809template <typename DerivedCCG, typename FuncTy, typename CallTy>
3810void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3811 recursivelyRemoveNoneTypeCalleeEdges(
3812 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3815 return;
3816
3817 removeNoneTypeCalleeEdges(Node);
3818
3819 for (auto *Clone : Node->Clones)
3820 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3821
3822
3823
3824 auto CallerEdges = Node->CallerEdges;
3825 for (auto &Edge : CallerEdges) {
3826
3827 if (Edge->isRemoved()) {
3829 continue;
3830 }
3831 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3832 }
3833}
3834
3835
3836template <typename DerivedCCG, typename FuncTy, typename CallTy>
3837void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3838
3839
3841 return;
3842 DenseSet<const ContextNode *> Visited;
3843 DenseSet<const ContextNode *> CurrentStack;
3844 for (auto &Entry : NonAllocationCallToContextNodeMap) {
3846 if (Node->isRemoved())
3847 continue;
3848
3849 if (->CallerEdges.empty())
3850 continue;
3851 markBackedges(Node, Visited, CurrentStack);
3853 }
3854}
3855
3856
3857template <typename DerivedCCG, typename FuncTy, typename CallTy>
3858void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3859 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3860 DenseSet<const ContextNode *> &CurrentStack) {
3861 auto I = Visited.insert(Node);
3862
3864 (void)I;
3865 for (auto &CalleeEdge : Node->CalleeEdges) {
3866 auto *Callee = CalleeEdge->Callee;
3867 if (Visited.count(Callee)) {
3868
3869
3870 if (CurrentStack.count(Callee))
3871 CalleeEdge->IsBackedge = true;
3872 continue;
3873 }
3874 CurrentStack.insert(Callee);
3875 markBackedges(Callee, Visited, CurrentStack);
3876 CurrentStack.erase(Callee);
3877 }
3878}
3879
3880template <typename DerivedCCG, typename FuncTy, typename CallTy>
3881void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3882 DenseSet<const ContextNode *> Visited;
3883 for (auto &Entry : AllocationCallToContextNodeMap) {
3884 Visited.clear();
3885 identifyClones(Entry.second, Visited, Entry.second->getContextIds());
3886 }
3887 Visited.clear();
3888 for (auto &Entry : AllocationCallToContextNodeMap)
3889 recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited);
3891 check();
3892}
3893
3894
3901
3902template <typename DerivedCCG, typename FuncTy, typename CallTy>
3903void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3904 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3905 const DenseSet<uint32_t> &AllocContextIds) {
3909
3910
3911
3912
3913
3914
3915 if (->hasCall())
3916 return;
3917
3918
3920 return;
3921
3922#ifndef NDEBUG
3924#endif
3925 Visited.insert(Node);
3926
3928
3929
3930
3931
3932
3933 {
3934 auto CallerEdges = Node->CallerEdges;
3935 for (auto &Edge : CallerEdges) {
3936
3937 if (Edge->isRemoved()) {
3939 continue;
3940 }
3941
3942
3943 if (Edge->IsBackedge) {
3944
3945
3947 continue;
3948 }
3949
3950 if (!Visited.count(Edge->Caller) && ->Caller->CloneOf) {
3951 identifyClones(Edge->Caller, Visited, AllocContextIds);
3952 }
3953 }
3954 }
3955
3956
3958 return;
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3975 1,
3976 2};
3978 [&](const std::shared_ptr &A,
3979 const std::shared_ptr &B) {
3980
3981
3982 if (A->ContextIds.empty())
3983
3984
3985
3986
3987 return false;
3988 if (B->ContextIds.empty())
3989 return true;
3990
3991 if (A->AllocTypes == B->AllocTypes)
3992
3993
3994 return *A->ContextIds.begin() < *B->ContextIds.begin();
3995 return AllocTypeCloningPriority[A->AllocTypes] <
3996 AllocTypeCloningPriority[B->AllocTypes];
3997 });
3998
3999 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
4000
4001 DenseSet<uint32_t> RecursiveContextIds;
4003
4004
4006 DenseSet<uint32_t> AllCallerContextIds;
4007 for (auto &CE : Node->CallerEdges) {
4008
4009
4010 AllCallerContextIds.reserve(CE->getContextIds().size());
4011 for (auto Id : CE->getContextIds())
4012 if (!AllCallerContextIds.insert(Id).second)
4013 RecursiveContextIds.insert(Id);
4014 }
4015 }
4016
4017
4018
4019
4020
4021
4022
4023 auto CallerEdges = Node->CallerEdges;
4024 for (auto &CallerEdge : CallerEdges) {
4025
4026 if (CallerEdge->isRemoved()) {
4028 continue;
4029 }
4030 assert(CallerEdge->Callee == Node);
4031
4032
4033
4035 break;
4036
4037
4038
4039 if (!CallerEdge->Caller->hasCall())
4040 continue;
4041
4042
4043
4044 auto CallerEdgeContextsForAlloc =
4045 set_intersection(CallerEdge->getContextIds(), AllocContextIds);
4046 if (!RecursiveContextIds.empty())
4047 CallerEdgeContextsForAlloc =
4048 set_difference(CallerEdgeContextsForAlloc, RecursiveContextIds);
4049 if (CallerEdgeContextsForAlloc.empty())
4050 continue;
4051
4052 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4053
4054
4055
4056 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4057 CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
4058 for (auto &CalleeEdge : Node->CalleeEdges)
4059 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4060 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4077 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
4078 if (!CallerEdge->IsBackedge &&
4079 allocTypeToUse(CallerAllocTypeForAlloc) ==
4080 allocTypeToUse(Node->AllocTypes) &&
4081 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4082 CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
4083 continue;
4084 }
4085
4086 if (CallerEdge->IsBackedge) {
4087
4088
4090 DeferredBackedges++;
4091 }
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4104 !Visited.count(CallerEdge->Caller)) {
4105 const auto OrigIdCount = CallerEdge->getContextIds().size();
4106
4107
4108 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4109 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4110
4111
4112
4113 bool UpdatedEdge = false;
4114 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4115 for (auto E : Node->CallerEdges) {
4116
4117 if (E->Caller->CloneOf != CallerEdge->Caller)
4118 continue;
4119
4120
4121 auto CallerEdgeContextsForAllocNew =
4122 set_intersection(CallerEdgeContextsForAlloc, E->getContextIds());
4123 if (CallerEdgeContextsForAllocNew.empty())
4124 continue;
4125
4126
4127
4129 continue;
4130
4131
4132
4133 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4134 CallerEdge = E;
4135 UpdatedEdge = true;
4136 break;
4137 }
4138 }
4139
4140
4141
4142
4143 if (CallerEdge->isRemoved())
4144 continue;
4145
4146
4147
4148
4149
4150
4151 if (!UpdatedEdge) {
4153 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4154 if (CallerEdgeContextsForAlloc.empty())
4155 continue;
4156 }
4157
4158
4159 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4160 CalleeEdgeAllocTypesForCallerEdge.clear();
4161 for (auto &CalleeEdge : Node->CalleeEdges) {
4162 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4163 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4164 }
4165 }
4166
4167
4168
4169 ContextNode *Clone = nullptr;
4170 for (auto *CurClone : Node->Clones) {
4171 if (allocTypeToUse(CurClone->AllocTypes) !=
4172 allocTypeToUse(CallerAllocTypeForAlloc))
4173 continue;
4174
4177
4178
4179 assert(!BothSingleAlloc ||
4180 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4181
4182
4183
4184
4185
4186 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4187 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4188 Clone = CurClone;
4189 break;
4190 }
4191 }
4192
4193
4194 if (Clone)
4195 moveEdgeToExistingCalleeClone(CallerEdge, Clone, false,
4196 CallerEdgeContextsForAlloc);
4197 else
4198 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4199
4200
4201 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4202 }
4203
4204
4206
4207
4208 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
4209
4212}
4213
4214void ModuleCallsiteContextGraph::updateAllocationCall(
4219 "memprof", AllocTypeString);
4222 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())
4223 << ore::NV("AllocationCall", Call.call()) << " in clone "
4225 << " marked with memprof allocation attribute "
4226 << ore::NV("Attribute", AllocTypeString));
4227}
4228
4229void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
4233 assert(AI->Versions.size() > Call.cloneNo());
4234 AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
4235}
4236
4238ModuleCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
4240 if (!CB->getAttributes().hasFnAttr("memprof"))
4241 return AllocationType::None;
4242 return CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
4243 ? AllocationType::Cold
4244 : AllocationType::NotCold;
4245}
4246
4248IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
4250 assert(AI->Versions.size() > Call.cloneNo());
4252}
4253
4254void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4255 FuncInfo CalleeFunc) {
4256 auto *CurF = getCalleeFunc(CallerCall.call());
4257 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4259
4260
4261
4262
4264 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4265 LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
4266 << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4267 << "\n");
4268 MismatchedCloneAssignments++;
4269 }
4270 }
4271 if (NewCalleeCloneNo > 0)
4272 cast(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4273 OREGetter(CallerCall.call()->getFunction())
4274 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
4275 << ore::NV("Call", CallerCall.call()) << " in clone "
4276 << ore::NV("Caller", CallerCall.call()->getFunction())
4277 << " assigned to call function clone "
4278 << ore::NV("Callee", CalleeFunc.func()));
4279}
4280
4281void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4282 FuncInfo CalleeFunc) {
4285 "Caller cannot be an allocation which should not have profiled calls");
4286 assert(CI->Clones.size() > CallerCall.cloneNo());
4287 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4288 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4289
4290
4291
4292
4293 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4294 LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
4295 << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4296 << "\n");
4297 MismatchedCloneAssignments++;
4298 }
4299 CurCalleeCloneNo = NewCalleeCloneNo;
4300}
4301
4302
4303
4304
4308 if (!SP)
4309 return;
4311 SP->replaceLinkageName(MDName);
4313 if (!Decl)
4314 return;
4315 TempDISubprogram NewDecl = Decl->clone();
4316 NewDecl->replaceLinkageName(MDName);
4318}
4319
4320CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
4322ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4323 FuncInfo &Func, CallInfo &Call, DenseMap<CallInfo, CallInfo> &CallMap,
4324 std::vector &CallsWithMetadataInFunc, unsigned CloneNo) {
4325
4329 assert(.func()->getParent()->getFunction(Name));
4330 NewFunc->setName(Name);
4332 for (auto &Inst : CallsWithMetadataInFunc) {
4333
4334 assert(Inst.cloneNo() == 0);
4335 CallMap[Inst] = {cast(VMap[Inst.call()]), CloneNo};
4336 }
4337 OREGetter(Func.func())
4338 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())
4339 << "created clone " << ore::NV("NewFunction", NewFunc));
4340 return {NewFunc, CloneNo};
4341}
4342
4343CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4344 IndexCall>::FuncInfo
4345IndexCallsiteContextGraph::cloneFunctionForCallsite(
4346 FuncInfo &Func, CallInfo &Call, DenseMap<CallInfo, CallInfo> &CallMap,
4347 std::vector &CallsWithMetadataInFunc, unsigned CloneNo) {
4348
4349
4350
4351
4355
4356
4357
4358
4359
4360
4361 for (auto &Inst : CallsWithMetadataInFunc) {
4362
4363 assert(Inst.cloneNo() == 0);
4365 assert(AI->Versions.size() == CloneNo);
4366
4367
4368 AI->Versions.push_back(0);
4369 } else {
4371 assert(CI && CI->Clones.size() == CloneNo);
4372
4373
4374 CI->Clones.push_back(0);
4375 }
4376 CallMap[Inst] = {Inst.call(), CloneNo};
4377 }
4378 return {Func.func(), CloneNo};
4379}
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395template <typename DerivedCCG, typename FuncTy, typename CallTy>
4396void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4398 return;
4399
4400
4401
4402 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4403 for (auto &Entry : AllocationCallToContextNodeMap) {
4405 for (auto Id : Node->getContextIds())
4406 ContextIdToAllocationNode[Id] = Node->getOrigNode();
4407 for (auto *Clone : Node->Clones) {
4408 for (auto Id : Clone->getContextIds())
4409 ContextIdToAllocationNode[Id] = Clone->getOrigNode();
4410 }
4411 }
4412
4413
4414
4415
4416 DenseSet<const ContextNode *> Visited;
4417 for (auto &Entry : AllocationCallToContextNodeMap) {
4419
4420 mergeClones(Node, Visited, ContextIdToAllocationNode);
4421
4422
4423
4424
4425
4426 auto Clones = Node->Clones;
4427 for (auto *Clone : Clones)
4428 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4429 }
4430
4432 dbgs() << "CCG after merging:\n";
4433 dbgs() << *this;
4434 }
4436 exportToDot("aftermerge");
4437
4439 check();
4440 }
4441}
4442
4443
4444template <typename DerivedCCG, typename FuncTy, typename CallTy>
4445void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4446 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4447 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4450 return;
4451
4452
4453
4454
4455
4456
4457 bool FoundUnvisited = true;
4458 unsigned Iters = 0;
4459 while (FoundUnvisited) {
4460 Iters++;
4461 FoundUnvisited = false;
4462
4463
4464 auto CallerEdges = Node->CallerEdges;
4465 for (auto CallerEdge : CallerEdges) {
4466
4467 if (CallerEdge->Callee != Node)
4468 continue;
4469
4470
4472 FoundUnvisited = true;
4473 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4474 }
4475 }
4476
4477 TotalMergeInvokes++;
4478 TotalMergeIters += Iters;
4479 if (Iters > MaxMergeIters)
4480 MaxMergeIters = Iters;
4481
4482
4483 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4484}
4485
4486template <typename DerivedCCG, typename FuncTy, typename CallTy>
4487void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4488 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4489 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4490
4491 if (Node->emptyContextIds())
4492 return;
4493
4494
4495
4496 MapVector<ContextNode *, std::vector<std::shared_ptr>>
4497 OrigNodeToCloneEdges;
4498 for (const auto &E : Node->CalleeEdges) {
4499 auto *Callee = E->Callee;
4500 if (->CloneOf && Callee->Clones.empty())
4501 continue;
4502 ContextNode *Base = Callee->getOrigNode();
4503 OrigNodeToCloneEdges[Base].push_back(E);
4504 }
4505
4506
4507
4508
4509 auto CalleeCallerEdgeLessThan = [](const std::shared_ptr &A,
4510 const std::shared_ptr &B) {
4511 if (A->Callee->CallerEdges.size() != B->Callee->CallerEdges.size())
4512 return A->Callee->CallerEdges.size() < B->Callee->CallerEdges.size();
4513 if (A->Callee->CloneOf && ->Callee->CloneOf)
4514 return true;
4515 else if (->Callee->CloneOf && B->Callee->CloneOf)
4516 return false;
4517
4518
4519 return *A->ContextIds.begin() < *B->ContextIds.begin();
4520 };
4521
4522
4523
4524 for (auto Entry : OrigNodeToCloneEdges) {
4525
4526
4527 auto &CalleeEdges = Entry.second;
4528 auto NumCalleeClones = CalleeEdges.size();
4529
4530 if (NumCalleeClones == 1)
4531 continue;
4532
4533
4534
4535
4537
4538
4539
4540
4541 DenseSet<ContextNode *> OtherCallersToShareMerge;
4542 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4543 OtherCallersToShareMerge);
4544
4545
4546
4547
4548 ContextNode *MergeNode = nullptr;
4549 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4550 for (auto CalleeEdge : CalleeEdges) {
4551 auto *OrigCallee = CalleeEdge->Callee;
4552
4553
4554
4555 if (!MergeNode) {
4556
4557 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4558 MergeNode = OrigCallee;
4559 NonNewMergedNodes++;
4560 continue;
4561 }
4562
4563
4564
4565
4566 if (!OtherCallersToShareMerge.empty()) {
4567 bool MoveAllCallerEdges = true;
4568 for (auto CalleeCallerE : OrigCallee->CallerEdges) {
4569 if (CalleeCallerE == CalleeEdge)
4570 continue;
4571 if (!OtherCallersToShareMerge.contains(CalleeCallerE->Caller)) {
4572 MoveAllCallerEdges = false;
4573 break;
4574 }
4575 }
4576
4577
4578 if (MoveAllCallerEdges) {
4579 MergeNode = OrigCallee;
4580 NonNewMergedNodes++;
4581 continue;
4582 }
4583 }
4584 }
4585
4586 if (MergeNode) {
4587 assert(MergeNode != OrigCallee);
4588 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4589 false);
4590 } else {
4591 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4592 NewMergedNodes++;
4593 }
4594
4595
4596 if (!OtherCallersToShareMerge.empty()) {
4597
4598
4599
4600 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4601 for (auto &CalleeCallerE : OrigCalleeCallerEdges) {
4602 if (CalleeCallerE == CalleeEdge)
4603 continue;
4604 if (!OtherCallersToShareMerge.contains(CalleeCallerE->Caller))
4605 continue;
4606 CallerToMoveCount[CalleeCallerE->Caller]++;
4607 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4608 false);
4609 }
4610 }
4611 removeNoneTypeCalleeEdges(OrigCallee);
4612 removeNoneTypeCalleeEdges(MergeNode);
4613 }
4614 }
4615}
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630template <typename DerivedCCG, typename FuncTy, typename CallTy>
4631void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4632 findOtherCallersToShareMerge(
4633 ContextNode *Node,
4634 std::vector<std::shared_ptr> &CalleeEdges,
4635 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4636 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4637 auto NumCalleeClones = CalleeEdges.size();
4638
4639
4640 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4641
4642
4643 unsigned PossibleOtherCallerNodes = 0;
4644
4645
4646
4647 if (CalleeEdges[0]->Callee->CallerEdges.size() < 2)
4648 return;
4649
4650
4651
4652
4653 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4654 for (auto CalleeEdge : CalleeEdges) {
4655 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4656
4657
4658 for (auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4659 if (CalleeCallerEdges->Caller == Node) {
4660 assert(CalleeCallerEdges == CalleeEdge);
4661 continue;
4662 }
4663 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4664
4665
4666 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4667 NumCalleeClones)
4668 PossibleOtherCallerNodes++;
4669 }
4670
4671
4672 for (auto Id : CalleeEdge->getContextIds()) {
4673 auto *Alloc = ContextIdToAllocationNode.lookup(Id);
4675
4676
4677 MissingAllocForContextId++;
4678 continue;
4679 }
4680 CalleeEdgeToAllocNodes[CalleeEdge.get()].insert(Alloc);
4681 }
4682 }
4683
4684
4685
4686
4687 for (auto CalleeEdge : CalleeEdges) {
4688 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4689
4690 if (!PossibleOtherCallerNodes)
4691 break;
4692 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4693
4694 for (auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4695
4696 if (CalleeCallerE == CalleeEdge)
4697 continue;
4698
4699
4700 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4701 NumCalleeClones)
4702 continue;
4703
4704
4705 for (auto Id : CalleeCallerE->getContextIds()) {
4706 auto *Alloc = ContextIdToAllocationNode.lookup(Id);
4708 continue;
4709
4710
4711 if (!CurCalleeAllocNodes.contains(Alloc)) {
4712 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4713 PossibleOtherCallerNodes--;
4714 break;
4715 }
4716 }
4717 }
4718 }
4719
4720 if (!PossibleOtherCallerNodes)
4721 return;
4722
4723
4724
4725 for (auto &[OtherCaller, Count] : OtherCallersToSharedCalleeEdgeCount) {
4726 if (Count != NumCalleeClones)
4727 continue;
4728 OtherCallersToShareMerge.insert(OtherCaller);
4729 }
4730}
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773template <typename DerivedCCG, typename FuncTy, typename CallTy>
4774bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4776
4777 mergeClones();
4778
4779
4780
4781 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4782
4783
4784
4785 auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,
4786 const FuncInfo &CalleeFunc) {
4788 CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
4789 };
4790
4791
4792 struct FuncCloneInfo {
4793
4794 FuncInfo FuncClone;
4795
4796
4797 DenseMap<CallInfo, CallInfo> CallMap;
4798 };
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4826 UnassignedCallClones;
4827
4828
4829
4830 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4831 FuncInfo OrigFunc(Func);
4832
4833
4834
4835
4836 std::vector FuncCloneInfos;
4837 for (auto &Call : CallsWithMetadata) {
4838 ContextNode *Node = getNodeForInst(Call);
4839
4840
4841
4842 if (!Node || Node->Clones.empty())
4843 continue;
4845 "Not having a call should have prevented cloning");
4846
4847
4848
4849 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4850
4851
4852
4853 auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,
4854 CallInfo &Call,
4855 ContextNode *CallsiteClone,
4856 bool IsAlloc) {
4857
4858 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4859
4860 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4861 DenseMap<CallInfo, CallInfo> &CallMap =
4862 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4863 CallInfo CallClone(Call);
4864 if (auto It = CallMap.find(Call); It != CallMap.end())
4865 CallClone = It->second;
4866 CallsiteClone->setCall(CallClone);
4867
4868 for (auto &MatchingCall : Node->MatchingCalls) {
4869 CallInfo CallClone(MatchingCall);
4870 if (auto It = CallMap.find(MatchingCall); It != CallMap.end())
4871 CallClone = It->second;
4872
4873 MatchingCall = CallClone;
4874 }
4875 };
4876
4877
4878
4879
4880
4881 auto MoveEdgeToNewCalleeCloneAndSetUp =
4882 [&](const std::shared_ptr &Edge) {
4883 ContextNode *OrigCallee = Edge->Callee;
4884 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge);
4885 removeNoneTypeCalleeEdges(NewClone);
4886 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4887
4888
4889
4890 if (CallsiteToCalleeFuncCloneMap.count(OrigCallee))
4891 RecordCalleeFuncOfCallsite(
4892 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4893 return NewClone;
4894 };
4895
4896
4897
4898
4899 std::deque<ContextNode *> ClonesWorklist;
4900
4901 if (->emptyContextIds())
4902 ClonesWorklist.push_back(Node);
4904
4905
4906
4907
4908 unsigned NodeCloneCount = 0;
4909 while (!ClonesWorklist.empty()) {
4910 ContextNode *Clone = ClonesWorklist.front();
4911 ClonesWorklist.pop_front();
4912 NodeCloneCount++;
4915
4916
4917
4918
4919
4920 if (FuncCloneInfos.size() < NodeCloneCount) {
4921
4922 if (NodeCloneCount == 1) {
4923
4924
4925
4927 Clone->CallerEdges, [&](const std::shared_ptr &E) {
4928 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4929 }));
4930
4931
4932 FuncCloneInfos.push_back(
4933 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4934 AssignCallsiteCloneToFuncClone(
4935 OrigFunc, Call, Clone,
4936 AllocationCallToContextNodeMap.count(Call));
4937 for (auto &CE : Clone->CallerEdges) {
4938
4939 if (->Caller->hasCall())
4940 continue;
4941 RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
4942 }
4943 continue;
4944 }
4945
4946
4947
4948
4949
4950
4951 FuncInfo PreviousAssignedFuncClone;
4953 Clone->CallerEdges, [&](const std::shared_ptr &E) {
4954 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4955 });
4956 bool CallerAssignedToCloneOfFunc = false;
4957 if (EI != Clone->CallerEdges.end()) {
4958 const std::shared_ptr &Edge = *EI;
4959 PreviousAssignedFuncClone =
4960 CallsiteToCalleeFuncCloneMap[Edge->Caller];
4961 CallerAssignedToCloneOfFunc = true;
4962 }
4963
4964
4965
4966 DenseMap<CallInfo, CallInfo> NewCallMap;
4967 unsigned CloneNo = FuncCloneInfos.size();
4968 assert(CloneNo > 0 && "Clone 0 is the original function, which "
4969 "should already exist in the map");
4970 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4971 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
4972 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4973 FunctionClonesAnalysis++;
4975
4976
4977
4978
4979 if (!CallerAssignedToCloneOfFunc) {
4980 AssignCallsiteCloneToFuncClone(
4981 NewFuncClone, Call, Clone,
4982 AllocationCallToContextNodeMap.count(Call));
4983 for (auto &CE : Clone->CallerEdges) {
4984
4985 if (->Caller->hasCall())
4986 continue;
4987 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
4988 }
4989 continue;
4990 }
4991
4992
4993
4994
4995
4996
4997
4998
4999 auto CallerEdges = Clone->CallerEdges;
5000 for (auto CE : CallerEdges) {
5001
5002 if (CE->isRemoved()) {
5004 continue;
5005 }
5007
5008 if (->Caller->hasCall())
5009 continue;
5010
5011 if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
5012
5013
5014
5015 CallsiteToCalleeFuncCloneMap[CE->Caller] !=
5016 PreviousAssignedFuncClone)
5017 continue;
5018
5019 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032 auto CalleeEdges = CE->Caller->CalleeEdges;
5033 for (auto CalleeEdge : CalleeEdges) {
5034
5035
5036 if (CalleeEdge->isRemoved()) {
5038 continue;
5039 }
5041 ContextNode *Callee = CalleeEdge->Callee;
5042
5043
5044
5045 if (Callee == Clone || ->hasCall())
5046 continue;
5047
5048
5049
5050 if (Callee == CalleeEdge->Caller)
5051 continue;
5052 ContextNode *NewClone =
5053 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5054
5055
5056 removeNoneTypeCalleeEdges(Callee);
5057
5058
5059
5060
5061
5062
5063
5064 CallInfo OrigCall(Callee->getOrigNode()->Call);
5065 OrigCall.setCloneNo(0);
5066 DenseMap<CallInfo, CallInfo> &CallMap =
5067 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5069 CallInfo NewCall(CallMap[OrigCall]);
5071 NewClone->setCall(NewCall);
5072
5073 for (auto &MatchingCall : NewClone->MatchingCalls) {
5074 CallInfo OrigMatchingCall(MatchingCall);
5075 OrigMatchingCall.setCloneNo(0);
5076 assert(CallMap.count(OrigMatchingCall));
5077 CallInfo NewCall(CallMap[OrigMatchingCall]);
5079
5080 MatchingCall = NewCall;
5081 }
5082 }
5083 }
5084
5085
5086
5087 }
5088
5089 auto FindFirstAvailFuncClone = [&]() {
5090
5091
5092
5093
5094 for (auto &CF : FuncCloneInfos) {
5095 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5096 return CF.FuncClone;
5097 }
5099 "Expected an available func clone for this callsite clone");
5100 };
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5117 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5118
5119
5120
5121 auto CloneCallerEdges = Clone->CallerEdges;
5122 for (auto &Edge : CloneCallerEdges) {
5123
5124
5125
5126 if (Edge->isRemoved())
5127 continue;
5128
5129 if (->Caller->hasCall())
5130 continue;
5131
5132
5133 if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
5134 FuncInfo FuncCloneCalledByCaller =
5135 CallsiteToCalleeFuncCloneMap[Edge->Caller];
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5146 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5147 Clone) ||
5148
5149
5150
5151
5152
5153
5154 (FuncCloneAssignedToCurCallsiteClone &&
5155 FuncCloneAssignedToCurCallsiteClone !=
5156 FuncCloneCalledByCaller)) {
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171 if (FuncCloneToNewCallsiteCloneMap.count(
5172 FuncCloneCalledByCaller)) {
5173 ContextNode *NewClone =
5174 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5175 moveEdgeToExistingCalleeClone(Edge, NewClone);
5176
5177 removeNoneTypeCalleeEdges(NewClone);
5178 } else {
5179
5180 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(Edge);
5181 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5182 NewClone;
5183
5184 ClonesWorklist.push_back(NewClone);
5185 }
5186
5187
5188 removeNoneTypeCalleeEdges(Clone);
5189
5190
5191 continue;
5192 }
5193
5194
5195
5196 if (!FuncCloneAssignedToCurCallsiteClone) {
5197 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5198
5199 AssignCallsiteCloneToFuncClone(
5200 FuncCloneCalledByCaller, Call, Clone,
5201 AllocationCallToContextNodeMap.count(Call));
5202 } else
5203
5204
5205 assert(FuncCloneAssignedToCurCallsiteClone ==
5206 FuncCloneCalledByCaller);
5207
5208 } else {
5209
5210
5211
5212
5213
5214
5215 if (!FuncCloneAssignedToCurCallsiteClone) {
5216 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5217 assert(FuncCloneAssignedToCurCallsiteClone);
5218
5219 AssignCallsiteCloneToFuncClone(
5220 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
5221 AllocationCallToContextNodeMap.count(Call));
5222 } else
5223 assert(FuncCloneToCurNodeCloneMap
5224 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5225
5226 RecordCalleeFuncOfCallsite(Edge->Caller,
5227 FuncCloneAssignedToCurCallsiteClone);
5228 }
5229 }
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247 if (!FuncCloneAssignedToCurCallsiteClone) {
5248 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5249 assert(FuncCloneAssignedToCurCallsiteClone &&
5250 "No available func clone for this callsite clone");
5251 AssignCallsiteCloneToFuncClone(
5252 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
5253 AllocationCallToContextNodeMap.contains(Call));
5254 }
5255 }
5258 for (const auto &PE : Node->CalleeEdges)
5260 for (const auto &CE : Node->CallerEdges)
5262 for (auto *Clone : Node->Clones) {
5264 for (const auto &PE : Clone->CalleeEdges)
5266 for (const auto &CE : Clone->CallerEdges)
5268 }
5269 }
5270 }
5271
5272 if (FuncCloneInfos.size() < 2)
5273 continue;
5274
5275
5276
5277
5278 for (auto &Call : CallsWithMetadata) {
5279 ContextNode *Node = getNodeForInst(Call);
5280 if (!Node || ->hasCall() || Node->emptyContextIds())
5281 continue;
5282
5283
5284
5285
5286 if (Node->Clones.size() + 1 >= FuncCloneInfos.size())
5287 continue;
5288
5289
5290 DenseSet NodeCallClones;
5291 for (auto *C : Node->Clones)
5292 NodeCallClones.insert(C->Call.cloneNo());
5293 unsigned I = 0;
5294
5295 for (auto &FC : FuncCloneInfos) {
5296
5297 assert(FC.FuncClone.cloneNo() == I);
5298
5299
5300 if (++I == 1 || NodeCallClones.contains(I)) {
5301 continue;
5302 }
5303
5304
5305 auto &CallVector = UnassignedCallClones[Node][I];
5306 DenseMap<CallInfo, CallInfo> &CallMap = FC.CallMap;
5307 if (auto It = CallMap.find(Call); It != CallMap.end()) {
5308 CallInfo CallClone = It->second;
5309 CallVector.push_back(CallClone);
5310 } else {
5311
5312
5313 assert(false && "Expected to find call in CallMap");
5314 }
5315
5316 for (auto &MatchingCall : Node->MatchingCalls) {
5317 if (auto It = CallMap.find(MatchingCall); It != CallMap.end()) {
5318 CallInfo CallClone = It->second;
5319 CallVector.push_back(CallClone);
5320 } else {
5321
5322
5323 assert(false && "Expected to find call in CallMap");
5324 }
5325 }
5326 }
5327 }
5328 }
5329
5330 uint8_t BothTypes =
5331 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5332
5333 auto UpdateCalls = [&](ContextNode *Node,
5334 DenseSet<const ContextNode *> &Visited,
5335 auto &&UpdateCalls) {
5336 auto Inserted = Visited.insert(Node);
5338 return;
5339
5340 for (auto *Clone : Node->Clones)
5341 UpdateCalls(Clone, Visited, UpdateCalls);
5342
5343 for (auto &Edge : Node->CallerEdges)
5344 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
5345
5346
5347
5348 if (->hasCall() || Node->emptyContextIds())
5349 return;
5350
5351 if (Node->IsAllocation) {
5352 auto AT = allocTypeToUse(Node->AllocTypes);
5353
5354
5355
5356
5358 !ContextIdToContextSizeInfos.empty()) {
5359 uint64_t TotalCold = 0;
5360 uint64_t Total = 0;
5361 for (auto Id : Node->getContextIds()) {
5362 auto TypeI = ContextIdToAllocationType.find(Id);
5363 assert(TypeI != ContextIdToAllocationType.end());
5364 auto CSI = ContextIdToContextSizeInfos.find(Id);
5365 if (CSI != ContextIdToContextSizeInfos.end()) {
5366 for (auto &Info : CSI->second) {
5368 if (TypeI->second == AllocationType::Cold)
5369 TotalCold += Info.TotalSize;
5370 }
5371 }
5372 }
5374 AT = AllocationType::Cold;
5375 }
5376 updateAllocationCall(Node->Call, AT);
5377 assert(Node->MatchingCalls.empty());
5378 return;
5379 }
5380
5381 if (!CallsiteToCalleeFuncCloneMap.count(Node))
5382 return;
5383
5384 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
5385 updateCall(Node->Call, CalleeFunc);
5386
5387 for (auto &Call : Node->MatchingCalls)
5388 updateCall(Call, CalleeFunc);
5389
5390
5391
5392 if (!UnassignedCallClones.contains(Node))
5393 return;
5394 DenseSet NodeCallClones;
5395 for (auto *C : Node->Clones)
5396 NodeCallClones.insert(C->Call.cloneNo());
5397
5398 auto &ClonedCalls = UnassignedCallClones[Node];
5399 for (auto &[CloneNo, CallVector] : ClonedCalls) {
5400
5401 assert(CloneNo > 0);
5402
5403 if (NodeCallClones.contains(CloneNo))
5404 continue;
5405
5406 for (auto &Call : CallVector)
5407 updateCall(Call, CalleeFunc);
5408 }
5409 };
5410
5411
5412
5413
5414
5415
5416 DenseSet<const ContextNode *> Visited;
5417 for (auto &Entry : AllocationCallToContextNodeMap)
5418 UpdateCalls(Entry.second, Visited, UpdateCalls);
5419
5421}
5422
5423
5424
5426 SHA1 Hasher;
5427
5428
5429 for (auto &SN : FS->callsites()) {
5430
5431
5432
5434 SN.Clones.size() > I &&
5435 "Callsite summary has fewer entries than other summaries in function");
5436 if (SN.Clones.size() <= I || !SN.Clones[I])
5437 continue;
5441 }
5442
5443 for (auto &AN : FS->allocs()) {
5444
5445
5446
5447 assert(AN.Versions.size() > I &&
5448 "Alloc summary has fewer entries than other summaries in function");
5449 if (AN.Versions.size() <= I ||
5451 continue;
5453 }
5455}
5456
5460 &FuncToAliasMap,
5463
5464
5466 NewGV->takeName(DeclGV);
5469 };
5470
5471
5472
5473 auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
5474 if (!FuncToAliasMap.count(&F))
5475 return;
5476 for (auto *A : FuncToAliasMap[&F]) {
5478 auto *PrevA = M.getNamedAlias(AliasName);
5480 A->getType()->getPointerAddressSpace(),
5481 A->getLinkage(), AliasName, NewF);
5482 NewA->copyAttributesFrom(A);
5483 if (PrevA)
5484 TakeDeclNameAndReplace(PrevA, NewA);
5485 }
5486 };
5487
5488
5489
5490 assert(NumClones > 1);
5492 VMaps.reserve(NumClones - 1);
5493 FunctionsClonedThinBackend++;
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5506
5507
5509
5510 for (unsigned I = 1; I < NumClones; I++) {
5511 VMaps.emplace_back(std::make_unique());
5514
5515
5516
5517 if (HashToFunc.contains(Hash)) {
5518 FunctionCloneDuplicatesThinBackend++;
5519 auto *Func = HashToFunc[Hash];
5520 if (Func->hasAvailableExternallyLinkage()) {
5521
5522
5523
5524
5525
5526 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5528 << "created clone decl " << ore::NV("Decl", Decl.getCallee()));
5529 continue;
5530 }
5531 auto *PrevF = M.getFunction(Name);
5533 if (PrevF)
5534 TakeDeclNameAndReplace(PrevF, Alias);
5536 << "created clone alias " << ore::NV("Alias", Alias));
5537
5538
5539 CloneFuncAliases(Func, I);
5540 continue;
5541 }
5543 HashToFunc[Hash] = NewF;
5544 FunctionClonesThinBackend++;
5545
5546
5547 for (auto &BB : *NewF) {
5548 for (auto &Inst : BB) {
5549 Inst.setMetadata(LLVMContext::MD_memprof, nullptr);
5550 Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
5551 }
5552 }
5554 if (PrevF)
5555 TakeDeclNameAndReplace(PrevF, NewF);
5556 else
5557 NewF->setName(Name);
5560 << "created clone " << ore::NV("NewFunction", NewF));
5561
5562
5563 CloneFuncAliases(NewF, I);
5564 }
5565 return VMaps;
5566}
5567
5568
5569
5572 const Function *CallingFunc = nullptr) {
5573
5574
5575
5577 if (!TheFnVI)
5578
5579
5580
5583 if (TheFnVI)
5584 return TheFnVI;
5585
5588
5589
5590
5591 auto SrcFileMD = F.getMetadata("thinlto_src_file");
5592
5593
5594
5595
5596
5597 if (!SrcFileMD && F.isDeclaration()) {
5598
5599
5600 assert(CallingFunc);
5601 SrcFileMD = CallingFunc->getMetadata("thinlto_src_file");
5602
5603
5604
5605
5606 assert(SrcFileMD || OrigName == F.getName());
5607 }
5608 StringRef SrcFile = M.getSourceFileName();
5609 if (SrcFileMD)
5610 SrcFile = dyn_cast(SrcFileMD->getOperand(0))->getString();
5615
5616
5617
5618
5619
5620 if (!TheFnVI && OrigName == F.getName() && F.hasLocalLinkage() &&
5621 F.getName().contains('.')) {
5622 OrigName = F.getName().rsplit('.').first;
5627 }
5628
5629
5630
5631 assert(TheFnVI || F.isDeclaration());
5632 return TheFnVI;
5633}
5634
5635bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5637 ICallAnalysis = std::make_unique();
5638 Symtab = std::make_unique();
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649 if (Error E = Symtab->create(M, true, false)) {
5650 std::string SymtabFailure = toString(std::move(E));
5651 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
5652 return false;
5653 }
5654 return true;
5655}
5656
5657#ifndef NDEBUG
5658
5659
5664 auto MIBIter = AllocNode.MIBs.begin();
5665 for (auto &MDOp : MemProfMD->operands()) {
5666 assert(MIBIter != AllocNode.MIBs.end());
5667 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5670 assert(StackMDNode);
5672 auto ContextIterBegin =
5674
5675 uint64_t LastStackContextId =
5676 (ContextIterBegin != StackContext.end() && *ContextIterBegin == 0) ? 1
5677 : 0;
5678 for (auto ContextIter = ContextIterBegin; ContextIter != StackContext.end();
5679 ++ContextIter) {
5680
5681
5682
5683 if (LastStackContextId == *ContextIter)
5684 continue;
5685 LastStackContextId = *ContextIter;
5686 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5688 *ContextIter);
5689 StackIdIndexIter++;
5690 }
5691 MIBIter++;
5692 }
5693}
5694#endif
5695
5696bool MemProfContextDisambiguation::applyImport(Module &M) {
5697 assert(ImportSummary);
5699
5700
5701
5702
5703 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5704 FuncToAliasMap;
5705 for (auto &A : M.aliases()) {
5706 auto *Aliasee = A.getAliaseeObject();
5708 FuncToAliasMap[F].insert(&A);
5709 }
5710
5711 if (!initializeIndirectCallPromotionInfo(M))
5712 return false;
5713
5714 for (auto &F : M) {
5716 continue;
5717
5718 OptimizationRemarkEmitter ORE(&F);
5719
5721 bool ClonesCreated = false;
5722 unsigned NumClonesCreated = 0;
5723 auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
5724
5725 assert(NumClones > 0);
5726
5727 if (NumClones == 1)
5728 return;
5729
5730
5731
5732
5733 if (ClonesCreated) {
5734 assert(NumClonesCreated == NumClones);
5735 return;
5736 }
5738
5739 assert(VMaps.size() == NumClones - 1);
5741 ClonesCreated = true;
5742 NumClonesCreated = NumClones;
5743 };
5744
5745 auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
5746 Function *CalledFunction, FunctionSummary *FS) {
5747
5748 CloneFuncIfNeeded(StackNode.Clones.size(), FS);
5749
5751
5752
5753
5754
5755
5756
5757
5758
5760 if (CalledFunction != CB->getCalledOperand() &&
5761 (!GA || CalledFunction != GA->getAliaseeObject())) {
5762 SkippedCallsCloning++;
5763 return;
5764 }
5765
5766
5767
5768 auto CalleeOrigName = CalledFunction->getName();
5769 for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
5770
5771
5772 if (J > 0 && VMaps[J - 1]->empty())
5773 continue;
5774
5775
5776 if (!StackNode.Clones[J])
5777 continue;
5778 auto NewF = M.getOrInsertFunction(
5780 CalledFunction->getFunctionType());
5781 CallBase *CBClone;
5782
5783 if (!J)
5784 CBClone = CB;
5785 else
5787
5788
5789
5790
5791
5792
5794 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
5795 << ore::NV("Call", CBClone) << " in clone "
5797 << " assigned to call function clone "
5798 << ore::NV("Callee", NewF.getCallee()));
5799 }
5800 };
5801
5802
5804
5805
5806
5807
5808 if (!TheFnVI)
5809 continue;
5810
5811 auto *GVSummary =
5812 ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
5813 if (!GVSummary) {
5814
5815
5816 auto SrcModuleMD = F.getMetadata("thinlto_src_module");
5817 assert(SrcModuleMD &&
5818 "enable-import-metadata is needed to emit thinlto_src_module");
5819 StringRef SrcModule =
5822 if (GVS->modulePath() == SrcModule) {
5823 GVSummary = GVS.get();
5824 break;
5825 }
5826 }
5827 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5828 }
5829
5830
5831
5833 continue;
5834
5836
5837 if (FS->allocs().empty() && FS->callsites().empty())
5838 continue;
5839
5840 auto SI = FS->callsites().begin();
5841 auto AI = FS->allocs().begin();
5842
5843
5844
5845
5846 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5847
5848
5849 for (auto CallsiteIt = FS->callsites().rbegin();
5850 CallsiteIt != FS->callsites().rend(); CallsiteIt++) {
5851 auto &Callsite = *CallsiteIt;
5852
5853
5854
5855 if (!Callsite.StackIdIndices.empty())
5856 break;
5857 MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite});
5858 }
5859
5860
5862
5863
5864
5865
5866 for (auto &BB : F) {
5867 for (auto &I : BB) {
5869
5871 continue;
5872
5873 auto *CalledValue = CB->getCalledOperand();
5874 auto *CalledFunction = CB->getCalledFunction();
5875 if (CalledValue && !CalledFunction) {
5876 CalledValue = CalledValue->stripPointerCasts();
5877
5879 }
5880
5881
5883 assert(!CalledFunction &&
5884 "Expected null called function in callsite for alias");
5886 }
5887
5888 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5889 I.getMetadata(LLVMContext::MD_callsite));
5890 auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
5891
5892
5893
5894
5895
5896 if (CB->getAttributes().hasFnAttr("memprof") && !MemProfMD) {
5897 CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
5898 ? AllocTypeColdThinBackend++
5899 : AllocTypeNotColdThinBackend++;
5900 OrigAllocsThinBackend++;
5901 AllocVersionsThinBackend++;
5902 if (!MaxAllocVersionsThinBackend)
5903 MaxAllocVersionsThinBackend = 1;
5904 continue;
5905 }
5906
5907 if (MemProfMD) {
5908
5909 assert(AI != FS->allocs().end());
5910 auto &AllocNode = *(AI++);
5911
5912#ifndef NDEBUG
5914 ImportSummary);
5915#endif
5916
5917
5918 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5919
5920 OrigAllocsThinBackend++;
5921 AllocVersionsThinBackend += AllocNode.Versions.size();
5922 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5923 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933 if (AllocNode.Versions.size() == 1 &&
5934 (AllocationType)AllocNode.Versions[0] != AllocationType::Cold) {
5936 AllocationType::NotCold ||
5938 AllocationType::None);
5939 UnclonableAllocsThinBackend++;
5940 continue;
5941 }
5942
5943
5945 return Type == ((uint8_t)AllocationType::NotCold |
5946 (uint8_t)AllocationType::Cold);
5947 }));
5948
5949
5950 for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5951
5952
5953 if (J > 0 && VMaps[J - 1]->empty())
5954 continue;
5955
5956 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5957 continue;
5959 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5960 : AllocTypeNotColdThinBackend++;
5963 AllocTypeString);
5964 CallBase *CBClone;
5965
5966 if (!J)
5967 CBClone = CB;
5968 else
5969
5970
5971
5975 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)
5976 << ore::NV("AllocationCall", CBClone) << " in clone "
5978 << " marked with memprof allocation attribute "
5979 << ore::NV("Attribute", AllocTypeString));
5980 }
5981 } else if (!CallsiteContext.empty()) {
5982 if (!CalledFunction) {
5983#ifndef NDEBUG
5984
5986 assert(!CI || !CI->isInlineAsm());
5987#endif
5988
5990
5991
5992
5993
5994
5995 auto NumClones =
5996 recordICPInfo(CB, FS->callsites(), SI, ICallAnalysisInfo);
5997
5998
5999
6000
6001 if (NumClones)
6002 CloneFuncIfNeeded(NumClones, FS);
6003 }
6004
6005 else {
6006
6007 assert(SI != FS->callsites().end());
6008 auto &StackNode = *(SI++);
6009
6010#ifndef NDEBUG
6011
6012
6014 for (auto StackId : CallsiteContext) {
6016 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6017 StackId);
6018 StackIdIndexIter++;
6019 }
6020#endif
6021
6022 CloneCallsite(StackNode, CB, CalledFunction, FS);
6023 }
6024 } else if (CB->isTailCall() && CalledFunction) {
6025
6026
6027 ValueInfo CalleeVI =
6029 if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
6030 auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
6031 assert(Callsite != MapTailCallCalleeVIToCallsite.end());
6032 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6033 }
6034 }
6035 }
6036 }
6037
6038
6039 performICP(M, FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6040 }
6041
6042
6043
6044 for (auto &F : M) {
6045
6046
6048 continue;
6049 for (auto &BB : F) {
6050 for (auto &I : BB) {
6052 continue;
6053 I.setMetadata(LLVMContext::MD_memprof, nullptr);
6054 I.setMetadata(LLVMContext::MD_callsite, nullptr);
6055 }
6056 }
6057 }
6058
6060}
6061
6062unsigned MemProfContextDisambiguation::recordICPInfo(
6066
6067 uint32_t NumCandidates;
6068 uint64_t TotalCount;
6069 auto CandidateProfileData =
6070 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
6071 NumCandidates);
6072 if (CandidateProfileData.empty())
6073 return 0;
6074
6075
6076
6077
6078 bool ICPNeeded = false;
6079 unsigned NumClones = 0;
6080 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.begin(), SI);
6081 for (const auto &Candidate : CandidateProfileData) {
6082#ifndef NDEBUG
6083 auto CalleeValueInfo =
6084#endif
6085 ImportSummary->getValueInfo(Candidate.Value);
6086
6087
6088 assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);
6089 assert(SI != AllCallsites.end());
6090 auto &StackNode = *(SI++);
6091
6092
6093
6095 [](unsigned CloneNo) { return CloneNo != 0; });
6096
6097
6099 NumClones = StackNode.Clones.size();
6100 }
6101 if (!ICPNeeded)
6102 return NumClones;
6103
6104
6105 ICallAnalysisInfo.push_back({CB, CandidateProfileData.vec(), NumCandidates,
6106 TotalCount, CallsiteInfoStartIndex});
6107 return NumClones;
6108}
6109
6110void MemProfContextDisambiguation::performICP(
6112 ArrayRef<std::unique_ptr> VMaps,
6114 OptimizationRemarkEmitter &ORE) {
6115
6116
6117
6118
6119
6120
6121 for (auto &Info : ICallAnalysisInfo) {
6122 auto *CB = Info.CB;
6124 auto TotalCount = Info.TotalCount;
6125 unsigned NumPromoted = 0;
6126 unsigned NumClones = 0;
6127
6128 for (auto &Candidate : Info.CandidateProfileData) {
6129 auto &StackNode = AllCallsites[CallsiteIndex++];
6130
6131
6133 NumClones = StackNode.Clones.size();
6134
6135
6136
6137
6138
6139 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6140 if (TargetFunction == nullptr ||
6141
6142
6143
6144
6147 ORE.emit([&]() {
6148 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", CB)
6149 << "Memprof cannot promote indirect call: target with md5sum "
6150 << ore::NV("target md5sum", Candidate.Value) << " not found";
6151 });
6152
6153
6154
6155 continue;
6156 }
6157
6158
6159 const char *Reason = nullptr;
6161 ORE.emit([&]() {
6162 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", CB)
6163 << "Memprof cannot promote indirect call to "
6164 << ore::NV("TargetFunction", TargetFunction)
6165 << " with count of " << ore::NV("TotalCount", TotalCount)
6166 << ": " << Reason;
6167 });
6168 continue;
6169 }
6170
6172
6173
6174
6175
6176 CallBase *CBClone = CB;
6177 for (unsigned J = 0; J < NumClones; J++) {
6178
6179
6180 if (J > 0 && VMaps[J - 1]->empty())
6181 continue;
6182
6183 if (J > 0)
6185
6186
6187
6190 TotalCount, isSamplePGO, &ORE);
6191 auto *TargetToUse = TargetFunction;
6192
6193
6194 if (StackNode.Clones[J]) {
6195 TargetToUse =
6198 StackNode.Clones[J]),
6200 .getCallee());
6201 }
6202 DirectCall.setCalledFunction(TargetToUse);
6203
6204
6205
6206
6207
6208
6212 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
6213 << ore::NV("Call", CBClone) << " in clone "
6215 << " promoted and assigned to call function clone "
6216 << ore::NV("Callee", TargetToUse));
6217 }
6218
6219
6220 TotalCount -= Candidate.Count;
6221 NumPromoted++;
6222 }
6223
6224
6225 CallBase *CBClone = CB;
6226 for (unsigned J = 0; J < NumClones; J++) {
6227
6228
6229 if (J > 0 && VMaps[J - 1]->empty())
6230 continue;
6231
6232 if (J > 0)
6234
6235 CBClone->setMetadata(LLVMContext::MD_prof, nullptr);
6236
6237
6238 if (TotalCount != 0)
6240 M, *CBClone, ArrayRef(Info.CandidateProfileData).slice(NumPromoted),
6241 TotalCount, IPVK_IndirectCallTarget, Info.NumCandidates);
6242 }
6243 }
6244}
6245
6246template <typename DerivedCCG, typename FuncTy, typename CallTy>
6247bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6249 dbgs() << "CCG before cloning:\n";
6250 dbgs() << *this;
6251 }
6253 exportToDot("postbuild");
6254
6256 check();
6257 }
6258
6259 identifyClones();
6260
6262 check();
6263 }
6264
6266 dbgs() << "CCG after cloning:\n";
6267 dbgs() << *this;
6268 }
6270 exportToDot("cloned");
6271
6272 bool Changed = assignFunctions();
6273
6275 dbgs() << "CCG after assigning function clones:\n";
6276 dbgs() << *this;
6277 }
6279 exportToDot("clonefuncassign");
6280
6282 printTotalSizes(errs());
6283
6285}
6286
6287bool MemProfContextDisambiguation::processModule(
6289 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6290
6291
6292
6293 if (ImportSummary)
6294 return applyImport(M);
6295
6296
6297
6298
6299
6300
6301
6302
6303
6305 return false;
6306
6307 ModuleCallsiteContextGraph CCG(M, OREGetter);
6308 return CCG.process();
6309}
6310
6313 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6314
6315
6318 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6322 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6326 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6327 "-memprof-dot-context-id");
6328 if (ImportSummary) {
6329
6330
6331
6333 return;
6334 }
6336 return;
6337
6338 auto ReadSummaryFile =
6340 if (!ReadSummaryFile) {
6343 "': ");
6344 return;
6345 }
6347 if (!ImportSummaryForTestingOrErr) {
6350 "': ");
6351 return;
6352 }
6353 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6354 ImportSummary = ImportSummaryForTesting.get();
6355}
6356
6362 };
6363 if (!processModule(M, OREGetter))
6366}
6367
6371 isPrevailing) {
6372
6373
6374
6375
6378 return;
6379
6380 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6381 CCG.process();
6382}
6383
6384
6385
6386
6387
6389
6390
6391
6392
6393
6395 for (auto &F : M) {
6396 for (auto &BB : F) {
6397 for (auto &I : BB) {
6399 if (!CI)
6400 continue;
6401 if (CI->hasFnAttr("memprof")) {
6402 CI->removeFnAttr("memprof");
6404 }
6405 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6406 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6407 continue;
6408 }
6409
6410
6411
6412 CI->setMetadata(LLVMContext::MD_memprof, nullptr);
6413 CI->setMetadata(LLVMContext::MD_callsite, nullptr);
6415 }
6416 }
6417 }
6421}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
Definition MemProfContextDisambiguation.cpp:5425
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
Definition MemProfContextDisambiguation.cpp:2298
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
Definition MemProfContextDisambiguation.cpp:1634
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
Definition MemProfContextDisambiguation.cpp:3895
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
Definition MemProfContextDisambiguation.cpp:5570
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
Definition MemProfContextDisambiguation.cpp:1407
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
Definition MemProfContextDisambiguation.cpp:3540
DotScope
Definition MemProfContextDisambiguation.cpp:130
@ Context
Definition MemProfContextDisambiguation.cpp:133
@ Alloc
Definition MemProfContextDisambiguation.cpp:132
@ All
Definition MemProfContextDisambiguation.cpp:131
static unsigned getMemProfCloneNum(const Function &F)
Definition MemProfContextDisambiguation.cpp:2305
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
Definition MemProfContextDisambiguation.cpp:5457
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
Definition MemProfContextDisambiguation.cpp:1643
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
Definition MemProfContextDisambiguation.cpp:2290
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
Definition MemProfContextDisambiguation.cpp:2288
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
Definition MemProfContextDisambiguation.cpp:4305
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
Definition MemProfContextDisambiguation.cpp:5660
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
Definition MemProfContextDisambiguation.cpp:6311
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition MemProfContextDisambiguation.cpp:6357
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition MemProfContextDisambiguation.cpp:6388
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
Definition MemProfContextDisambiguation.cpp:228
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr to an Expected.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
Definition MemProfContextDisambiguation.cpp:3382
typename GTraits::NodeRef NodeRef
Definition MemProfContextDisambiguation.cpp:3395
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
Definition MemProfContextDisambiguation.cpp:3450
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
Definition MemProfContextDisambiguation.cpp:3393
typename GTraits::ChildIteratorType ChildIteratorType
Definition MemProfContextDisambiguation.cpp:3396
static std::string getNodeAttributes(NodeRef Node, GraphType G)
Definition MemProfContextDisambiguation.cpp:3419
static bool isNodeHidden(NodeRef Node, GraphType G)
Definition MemProfContextDisambiguation.cpp:3483
static std::string getNodeLabel(NodeRef Node, GraphType G)
Definition MemProfContextDisambiguation.cpp:3398
GraphTraits< GraphType > GTraits
Definition MemProfContextDisambiguation.cpp:3394
static NodeRef getNode(const NodePtrTy &P)
Definition MemProfContextDisambiguation.cpp:3341
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
Definition MemProfContextDisambiguation.cpp:3361
static ChildIteratorType child_end(NodeRef N)
Definition MemProfContextDisambiguation.cpp:3374
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
Definition MemProfContextDisambiguation.cpp:3340
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
Definition MemProfContextDisambiguation.cpp:3365
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
Definition MemProfContextDisambiguation.cpp:3337
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
Definition MemProfContextDisambiguation.cpp:3338
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
Definition MemProfContextDisambiguation.cpp:3343
static ChildIteratorType child_begin(NodeRef N)
Definition MemProfContextDisambiguation.cpp:3370
static NodeRef getEntryNode(GraphType G)
Definition MemProfContextDisambiguation.cpp:3355
static nodes_iterator nodes_begin(GraphType G)
Definition MemProfContextDisambiguation.cpp:3347
static nodes_iterator nodes_end(GraphType G)
Definition MemProfContextDisambiguation.cpp:3351
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Definition MemProfContextDisambiguation.cpp:3359
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
Definition MemProfContextDisambiguation.cpp:1024
static SimpleType getSimplifiedValue(IndexCall &Val)
Definition MemProfContextDisambiguation.cpp:1025
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
Definition MemProfContextDisambiguation.cpp:1028
static SimpleType getSimplifiedValue(const IndexCall &Val)
Definition MemProfContextDisambiguation.cpp:1029
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...