LLVM: lib/Transforms/IPO/MemProfContextDisambiguation.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

50#include

51#include

52#include <unordered_map>

53#include

54using namespace llvm;

56

57#define DEBUG_TYPE "memprof-context-disambiguation"

58

60 "Number of function clones created during whole program analysis");

62 "Number of function clones created during ThinLTO backend");

64 "Number of functions that had clones created during ThinLTO backend");

66 FunctionCloneDuplicatesThinBackend,

67 "Number of function clone duplicates detected during ThinLTO backend");

68STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "

69 "cloned) during whole program analysis");

70STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "

71 "during whole program analysis");

73 "Number of not cold static allocations (possibly cloned) during "

74 "ThinLTO backend");

75STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "

76 "(possibly cloned) during ThinLTO backend");

78 "Number of original (not cloned) allocations with memprof profiles "

79 "during ThinLTO backend");

81 AllocVersionsThinBackend,

82 "Number of allocation versions (including clones) during ThinLTO backend");

84 "Maximum number of allocation versions created for an original "

85 "allocation during ThinLTO backend");

87 "Number of unclonable ambigous allocations during ThinLTO backend");

88STATISTIC(RemovedEdgesWithMismatchedCallees,

89 "Number of edges removed due to mismatched callees (profiled vs IR)");

91 "Number of profiled callees found via tail calls");

93 "Aggregate depth of profiled callees found via tail calls");

95 "Maximum depth of profiled callees found via tail calls");

96STATISTIC(FoundProfiledCalleeNonUniquelyCount,

97 "Number of profiled callees found via multiple tail call chains");

98STATISTIC(DeferredBackedges, "Number of backedges with deferred cloning");

99STATISTIC(NewMergedNodes, "Number of new nodes created during merging");

100STATISTIC(NonNewMergedNodes, "Number of non new nodes used during merging");

102 "Number of missing alloc nodes for context ids");

104 "Number of calls skipped during cloning due to unexpected operand");

106 "Number of callsites assigned to call multiple non-matching clones");

107STATISTIC(TotalMergeInvokes, "Number of merge invocations for nodes");

108STATISTIC(TotalMergeIters, "Number of merge iterations for nodes");

109STATISTIC(MaxMergeIters, "Max merge iterations for nodes");

110STATISTIC(NumImportantContextIds, "Number of important context ids");

111STATISTIC(NumFixupEdgeIdsInserted, "Number of fixup edge ids inserted");

112STATISTIC(NumFixupEdgesAdded, "Number of fixup edges added");

113STATISTIC(NumFixedContexts, "Number of contexts with fixed edges");

114

118 cl::desc("Specify the path prefix of the MemProf dot files."));

119

122 cl::desc("Export graph to dot files."));

123

124

127 cl::desc("Iteratively apply merging on a node to catch new callers"));

128

129

135

137 "memprof-dot-scope", cl::desc("Scope of graph to export to dot"),

142 "Export only nodes with contexts feeding given "

143 "-memprof-dot-alloc-id"),

145 "Export only nodes with given -memprof-dot-context-id")));

146

149 cl::desc("Id of alloc to export if -memprof-dot-scope=alloc "

150 "or to highlight if -memprof-dot-scope=all"));

151

154 cl::desc("Id of context to export if -memprof-dot-scope=context or to "

155 "highlight otherwise"));

156

159 cl::desc("Dump CallingContextGraph to stdout after each stage."));

160

163 cl::desc("Perform verification checks on CallingContextGraph."));

164

167 cl::desc("Perform frequent verification checks on nodes."));

168

170 "memprof-import-summary",

171 cl::desc("Import summary to use for testing the ThinLTO backend via opt"),

173

177 cl::desc("Max depth to recursively search for missing "

178 "frames through tail calls."));

179

180

183 cl::desc("Allow cloning of callsites involved in recursive cycles"));

184

187 cl::desc("Allow cloning of contexts through recursive cycles"));

188

189

190

191

194 cl::desc("Merge clones before assigning functions"));

195

196

197

198

199

200

203 cl::desc("Allow cloning of contexts having recursive cycles"));

204

205

206

209 cl::desc("Minimum absolute count for promoted target to be inlinable"));

210

211namespace llvm {

213 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,

215

216

217

220 cl::desc("Linking with hot/cold operator new interfaces"));

221

223 "memprof-require-definition-for-promotion", cl::init(false), cl::Hidden,

225 "Require target function definition when promoting indirect calls"));

226

229

232 cl::desc("Number of largest cold contexts to consider important"));

233

236 cl::desc("Enables edge fixup for important contexts"));

237

238}

239

240namespace {

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256template <typename DerivedCCG, typename FuncTy, typename CallTy>

257class CallsiteContextGraph {

258public:

259 CallsiteContextGraph() = default;

260 CallsiteContextGraph(const CallsiteContextGraph &) = default;

261 CallsiteContextGraph(CallsiteContextGraph &&) = default;

262

263

264 bool process();

265

266

267

268 void identifyClones();

269

270

271

272

273

274

275 bool assignFunctions();

276

277 void dump() const;

279 void printTotalSizes(raw_ostream &OS) const;

280

282 const CallsiteContextGraph &CCG) {

283 CCG.print(OS);

284 return OS;

285 }

286

287 friend struct GraphTraits<

288 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;

289 friend struct DOTGraphTraits<

290 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;

291

292 void exportToDot(std::string Label) const;

293

294

295 struct FuncInfo final

296 : public std::pair<FuncTy *, unsigned > {

297 using Base = std::pair<FuncTy *, unsigned>;

298 FuncInfo(const Base &B) : Base(B) {}

299 FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}

300 explicit operator bool() const { return this->first != nullptr; }

301 FuncTy *func() const { return this->first; }

302 unsigned cloneNo() const { return this->second; }

303 };

304

305

306 struct CallInfo final : public std::pair<CallTy, unsigned > {

307 using Base = std::pair<CallTy, unsigned>;

308 CallInfo(const Base &B) : Base(B) {}

309 CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)

310 : Base(Call, CloneNo) {}

311 explicit operator bool() const { return (bool)this->first; }

312 CallTy call() const { return this->first; }

313 unsigned cloneNo() const { return this->second; }

314 void setCloneNo(unsigned N) { this->second = N; }

315 void print(raw_ostream &OS) const {

316 if (!operator bool()) {

318 OS << "null Call";

319 return;

320 }

321 call()->print(OS);

322 OS << "\t(clone " << cloneNo() << ")";

323 }

324 void dump() const {

326 dbgs() << "\n";

327 }

328 friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {

330 return OS;

331 }

332 };

333

334 struct ContextEdge;

335

336

337 struct ContextNode {

338

339 unsigned NodeId = 0;

340

341

342

343

344 bool IsAllocation;

345

346

347

348 bool Recursive = false;

349

350

351

352 uint8_t AllocTypes = 0;

353

354

355

356 CallInfo Call;

357

358

359

360

361

363

364

365

366

367

368

369

370

371 uint64_t OrigStackOrAllocId = 0;

372

373

374

375 std::vector<std::shared_ptr> CalleeEdges;

376

377

378

379 std::vector<std::shared_ptr> CallerEdges;

380

381

382

383 bool useCallerEdgesForContextInfo() const {

384

385

386

387

388 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||

390

391

392

393

394

396 }

397

398

399

400 DenseSet<uint32_t> getContextIds() const {

401 unsigned Count = 0;

402

403

404

405

406 for (auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)

407 Count += Edge->getContextIds().size();

408 DenseSet<uint32_t> ContextIds;

411 CalleeEdges, useCallerEdgesForContextInfo()

412 ? CallerEdges

413 : std::vector<std::shared_ptr>());

414 for (const auto &Edge : Edges)

416 return ContextIds;

417 }

418

419

420

421 uint8_t computeAllocType() const {

422 uint8_t BothTypes =

423 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;

424 uint8_t AllocType = (uint8_t)AllocationType::None;

426 CalleeEdges, useCallerEdgesForContextInfo()

427 ? CallerEdges

428 : std::vector<std::shared_ptr>());

429 for (const auto &Edge : Edges) {

431

434 }

436 }

437

438

439

440 bool emptyContextIds() const {

442 CalleeEdges, useCallerEdgesForContextInfo()

443 ? CallerEdges

444 : std::vector<std::shared_ptr>());

445 for (const auto &Edge : Edges) {

446 if (Edge->getContextIds().empty())

447 return false;

448 }

449 return true;

450 }

451

452

453 std::vector<ContextNode *> Clones;

454

455

456 ContextNode *CloneOf = nullptr;

457

458 ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}

459

460 ContextNode(bool IsAllocation, CallInfo C)

461 : IsAllocation(IsAllocation), Call(C) {}

462

463 void addClone(ContextNode *Clone) {

464 if (CloneOf) {

465 CloneOf->Clones.push_back(Clone);

466 Clone->CloneOf = CloneOf;

467 } else {

468 Clones.push_back(Clone);

469 assert(!Clone->CloneOf);

470 Clone->CloneOf = this;

471 }

472 }

473

474 ContextNode *getOrigNode() {

475 if (!CloneOf)

476 return this;

477 return CloneOf;

478 }

479

481 unsigned int ContextId);

482

483 ContextEdge *findEdgeFromCallee(const ContextNode *Callee);

484 ContextEdge *findEdgeFromCaller(const ContextNode *Caller);

485 void eraseCalleeEdge(const ContextEdge *Edge);

486 void eraseCallerEdge(const ContextEdge *Edge);

487

488 void setCall(CallInfo C) { Call = C; }

489

490 bool hasCall() const { return (bool)Call.call(); }

491

492 void printCall(raw_ostream &OS) const { Call.print(OS); }

493

494

495

496 bool isRemoved() const {

497

498

499

500

502 (AllocTypes == (uint8_t)AllocationType::None) ==

503 emptyContextIds());

504 return AllocTypes == (uint8_t)AllocationType::None;

505 }

506

507 void dump() const;

508 void print(raw_ostream &OS) const;

509

510 friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {

511 Node.print(OS);

512 return OS;

513 }

514 };

515

516

517

518 struct ContextEdge {

519 ContextNode *Callee;

520 ContextNode *Caller;

521

522

523

524 uint8_t AllocTypes = 0;

525

526

527

528

529

530

531

532 bool IsBackedge = false;

533

534

535 DenseSet<uint32_t> ContextIds;

536

537 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,

538 DenseSet<uint32_t> ContextIds)

539 : Callee(Callee), Caller(Caller), AllocTypes(AllocType),

540 ContextIds(std::move(ContextIds)) {}

541

542 DenseSet<uint32_t> &getContextIds() { return ContextIds; }

543

544

545

546 inline void clear() {

547 ContextIds.clear();

548 AllocTypes = (uint8_t)AllocationType::None;

549 Caller = nullptr;

550 Callee = nullptr;

551 }

552

553

554

555

556 inline bool isRemoved() const {

557 if (Callee || Caller)

558 return false;

559

560

561

562 assert(AllocTypes == (uint8_t)AllocationType::None);

563 assert(ContextIds.empty());

564 return true;

565 }

566

567 void dump() const;

568 void print(raw_ostream &OS) const;

569

570 friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {

571 Edge.print(OS);

572 return OS;

573 }

574 };

575

576

577

578 void removeNoneTypeCalleeEdges(ContextNode *Node);

579 void removeNoneTypeCallerEdges(ContextNode *Node);

580 void

581 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,

582 DenseSet<const ContextNode *> &Visited);

583

584protected:

585

586

587 template <class NodeT, class IteratorT>

588 std::vector<uint64_t>

589 getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);

590

591

592

593 ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);

594

595

596 template <class NodeT, class IteratorT>

597 void addStackNodesForMIB(

598 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,

601 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);

602

603

604

605

606 void updateStackNodes();

607

608

609

610

611

612

613

614

615 void fixupImportantContexts();

616

617

618

619 void handleCallsitesWithMultipleTargets();

620

621

622 void markBackedges();

623

624

625

626 void mergeClones();

627

628

629

630

631

632 bool partitionCallsByCallee(

634 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);

635

636

637

638 MapVector<FuncTy *, std::vector> FuncToCallsWithMetadata;

639

640

641 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;

642

643

644

645 DenseSet<uint32_t> DotAllocContextIds;

646

647private:

648 using EdgeIter = typename std::vector<std::shared_ptr>::iterator;

649

650

651

652

653 struct CallContextInfo {

654

655 CallTy Call;

656

657 std::vector<uint64_t> StackIds;

658

659 const FuncTy *Func;

660

661

662 DenseSet<uint32_t> ContextIds;

663 };

664

665

666

667

668

669

670

671 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI = nullptr,

672 bool CalleeIter = true);

673

674

675

676

677

678

679

680 void assignStackNodesPostOrder(

681 ContextNode *Node, DenseSet<const ContextNode *> &Visited,

682 DenseMap<uint64_t, std::vector> &StackIdToMatchingCalls,

683 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,

684 const DenseSet<uint32_t> &ImportantContextIds);

685

686

687

688

689 DenseSet<uint32_t> duplicateContextIds(

690 const DenseSet<uint32_t> &StackSequenceContextIds,

691 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);

692

693

694 void propagateDuplicateContextIds(

695 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);

696

697

698

699

700 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,

701 bool TowardsCallee,

702 DenseSet<uint32_t> RemainingContextIds);

703

704

705

706

707 uint64_t getStackId(uint64_t IdOrIndex) const {

708 return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);

709 }

710

711

712

713

714

715

716

717 bool

718 calleesMatch(CallTy Call, EdgeIter &EI,

719 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);

720

721

722

723 const FuncTy *getCalleeFunc(CallTy Call) {

724 return static_cast<DerivedCCG *>(this)->getCalleeFunc(Call);

725 }

726

727

728

729

730 bool calleeMatchesFunc(

731 CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc,

732 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {

733 return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(

734 Call, Func, CallerFunc, FoundCalleeChain);

735 }

736

737

738 bool sameCallee(CallTy Call1, CallTy Call2) {

739 return static_cast<DerivedCCG *>(this)->sameCallee(Call1, Call2);

740 }

741

742

743

744 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {

745 return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(

747 }

748

749

750 uint64_t getLastStackId(CallTy Call) {

751 return static_cast<DerivedCCG *>(this)->getLastStackId(Call);

752 }

753

754

756 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;

757 static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);

758 }

759

760

762 return static_cast<const DerivedCCG *>(this)->getAllocationCallType(Call);

763 }

764

765

766

767 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {

768 static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);

769 }

770

771

772

773

774 FuncInfo cloneFunctionForCallsite(

775 FuncInfo &Func, CallInfo &Call, DenseMap<CallInfo, CallInfo> &CallMap,

776 std::vector &CallsWithMetadataInFunc, unsigned CloneNo) {

777 return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(

778 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);

779 }

780

781

782

783 std::string getLabel(const FuncTy *Func, const CallTy Call,

784 unsigned CloneNo) const {

785 return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);

786 }

787

788

789 ContextNode *createNewNode(bool IsAllocation, const FuncTy *F = nullptr,

790 CallInfo C = CallInfo()) {

791 NodeOwner.push_back(std::make_unique(IsAllocation, C));

792 auto *NewNode = NodeOwner.back().get();

793 if (F)

794 NodeToCallingFunc[NewNode] = F;

795 NewNode->NodeId = NodeOwner.size();

796 return NewNode;

797 }

798

799

800 ContextNode *getNodeForInst(const CallInfo &C);

801 ContextNode *getNodeForAlloc(const CallInfo &C);

802 ContextNode *getNodeForStackId(uint64_t StackId);

803

804

805

806 uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds) const;

807

808

809

810

811 uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,

812 const DenseSet<uint32_t> &Node2Ids) const;

813

814

815

816 uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,

817 const DenseSet<uint32_t> &Node2Ids) const;

818

819

820

821

822

823 ContextNode *

824 moveEdgeToNewCalleeClone(const std::shared_ptr &Edge,

825 DenseSet<uint32_t> ContextIdsToMove = {});

826

827

828

829

830

831 void moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge,

832 ContextNode *NewCallee,

833 bool NewClone = false,

834 DenseSet<uint32_t> ContextIdsToMove = {});

835

836

837

838

839

840

841 void moveCalleeEdgeToNewCaller(const std::shared_ptr &Edge,

842 ContextNode *NewCaller);

843

844

845 void markBackedges(ContextNode *Node, DenseSet<const ContextNode *> &Visited,

846 DenseSet<const ContextNode *> &CurrentStack);

847

848

849 void

850 mergeClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,

851 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);

852

853 void mergeNodeCalleeClones(

854 ContextNode *Node, DenseSet<const ContextNode *> &Visited,

855 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);

856

857

858 void findOtherCallersToShareMerge(

859 ContextNode *Node, std::vector<std::shared_ptr> &CalleeEdges,

860 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,

861 DenseSet<ContextNode *> &OtherCallersToShareMerge);

862

863

864

865

866

867 void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,

868 const DenseSet<uint32_t> &AllocContextIds);

869

870

871 DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;

872

873

874

875

876

877 DenseMap<uint32_t, std::vector> ContextIdToContextSizeInfos;

878

879

880

881

882

883 DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;

884

885

886

887 struct ImportantContextInfo {

888

889 std::vector<uint64_t> StackIds;

890

891

892 unsigned MaxLength = 0;

893

894

895

896 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;

897 };

898

899

900 DenseMap<uint32_t, ImportantContextInfo> ImportantContextIdInfo;

901

902

903

904

905 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *Node,

906

907

908

909 const DenseSet<uint32_t> &NodeContextIds,

910 const DenseSet<uint32_t> &ImportantContextIds) {

913 return;

914 }

915 DenseSet<uint32_t> Ids =

918 return;

919 auto Size = StackIds.size();

920 for (auto Id : Ids) {

921 auto &Entry = ImportantContextIdInfo[Id];

922 Entry.StackIdsToNode[StackIds] = Node;

923

926 }

927 }

928

929

930 MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;

931 MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;

932

933

934 std::vector<std::unique_ptr> NodeOwner;

935

936

937 void check() const;

938

939

940 unsigned int LastContextId = 0;

941};

942

943template <typename DerivedCCG, typename FuncTy, typename CallTy>

944using ContextNode =

945 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;

946template <typename DerivedCCG, typename FuncTy, typename CallTy>

947using ContextEdge =

948 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;

949template <typename DerivedCCG, typename FuncTy, typename CallTy>

950using FuncInfo =

951 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;

952template <typename DerivedCCG, typename FuncTy, typename CallTy>

953using CallInfo =

954 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;

955

956

957class ModuleCallsiteContextGraph

958 : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,

959 Instruction *> {

960public:

961 ModuleCallsiteContextGraph(

963 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);

964

965private:

966 friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,

968

969 uint64_t getStackId(uint64_t IdOrIndex) const;

970 const Function *getCalleeFunc(Instruction *Call);

971 bool calleeMatchesFunc(

972 Instruction *Call, const Function *Func, const Function *CallerFunc,

973 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);

974 bool sameCallee(Instruction *Call1, Instruction *Call2);

975 bool findProfiledCalleeThroughTailCalls(

976 const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,

977 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,

978 bool &FoundMultipleCalleeChains);

979 uint64_t getLastStackId(Instruction *Call);

980 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);

983 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);

984 CallsiteContextGraph<ModuleCallsiteContextGraph, Function,

986 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,

987 DenseMap<CallInfo, CallInfo> &CallMap,

988 std::vector &CallsWithMetadataInFunc,

989 unsigned CloneNo);

990 std::string getLabel(const Function *Func, const Instruction *Call,

991 unsigned CloneNo) const;

992

994 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;

995};

996

997

998

999

1000struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {

1001 IndexCall() : PointerUnion() {}

1002 IndexCall(std::nullptr_t) : IndexCall() {}

1003 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}

1004 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}

1005 IndexCall(PointerUnion PT) : PointerUnion(PT) {}

1006

1007 IndexCall *operator->() { return this; }

1008

1009 void print(raw_ostream &OS) const {

1010 PointerUnion<CallsiteInfo *, AllocInfo *> Base = *this;

1012 OS << *AI;

1013 } else {

1016 OS << *CI;

1017 }

1018 }

1019};

1020}

1021

1022namespace llvm {

1031}

1032

1033namespace {

1034

1035class IndexCallsiteContextGraph

1036 : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,

1037 IndexCall> {

1038public:

1039 IndexCallsiteContextGraph(

1040 ModuleSummaryIndex &Index,

1041 llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>

1042 isPrevailing);

1043

1044 ~IndexCallsiteContextGraph() {

1045

1046

1047

1048

1049 for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) {

1050 auto *FS = I.first;

1051 for (auto &Callsite : I.second)

1052 FS->addCallsite(*Callsite.second);

1053 }

1054 }

1055

1056private:

1057 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,

1058 IndexCall>;

1059

1060 uint64_t getStackId(uint64_t IdOrIndex) const;

1061 const FunctionSummary *getCalleeFunc(IndexCall &Call);

1062 bool calleeMatchesFunc(

1063 IndexCall &Call, const FunctionSummary *Func,

1064 const FunctionSummary *CallerFunc,

1065 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);

1066 bool sameCallee(IndexCall &Call1, IndexCall &Call2);

1067 bool findProfiledCalleeThroughTailCalls(

1068 ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,

1069 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,

1070 bool &FoundMultipleCalleeChains);

1071 uint64_t getLastStackId(IndexCall &Call);

1072 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);

1075 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);

1076 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,

1077 IndexCall>::FuncInfo

1078 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,

1079 DenseMap<CallInfo, CallInfo> &CallMap,

1080 std::vector &CallsWithMetadataInFunc,

1081 unsigned CloneNo);

1082 std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,

1083 unsigned CloneNo) const;

1084

1085

1086

1087 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;

1088

1089 const ModuleSummaryIndex &Index;

1090 llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>

1091 isPrevailing;

1092

1093

1094

1095

1096

1097 std::unordered_map<FunctionSummary *,

1098 std::map<ValueInfo, std::unique_ptr>>

1099 FunctionCalleesToSynthesizedCallsiteInfos;

1100};

1101}

1102

1103template <>

1107template <>

1110 : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};

1111template <>

1113 : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};

1114

1115namespace {

1116

1117

1118

1119

1120

1121

1124 if (AllocTypes ==

1127 else

1129}

1130

1131

1132

1133

1134template <typename DerivedCCG, typename FuncTy, typename CallTy>

1135bool allocTypesMatch(

1136 const std::vector<uint8_t> &InAllocTypes,

1137 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>

1138 &Edges) {

1139

1140

1141 assert(InAllocTypes.size() == Edges.size());

1142 return std::equal(

1143 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),

1145 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {

1146

1147

1148

1149 if (l == (uint8_t)AllocationType::None ||

1150 r->AllocTypes == (uint8_t)AllocationType::None)

1151 return true;

1152 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);

1153 });

1154}

1155

1156

1157

1158

1159

1160

1161template <typename DerivedCCG, typename FuncTy, typename CallTy>

1162bool allocTypesMatchClone(

1163 const std::vector<uint8_t> &InAllocTypes,

1164 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {

1165 const ContextNode<DerivedCCG, FuncTy, CallTy> *Node = Clone->CloneOf;

1167

1168

1169 assert(InAllocTypes.size() == Node->CalleeEdges.size());

1170

1172 EdgeCalleeMap;

1173 for (const auto &E : Clone->CalleeEdges) {

1175 EdgeCalleeMap[E->Callee] = E->AllocTypes;

1176 }

1177

1178

1179 for (unsigned I = 0; I < Node->CalleeEdges.size(); I++) {

1180 auto Iter = EdgeCalleeMap.find(Node->CalleeEdges[I]->Callee);

1181

1182 if (Iter == EdgeCalleeMap.end())

1183 continue;

1184

1185

1186

1189 continue;

1190 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[I]))

1191 return false;

1192 }

1193 return true;

1194}

1195

1196}

1197

1198template <typename DerivedCCG, typename FuncTy, typename CallTy>

1199typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *

1200CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(

1201 const CallInfo &C) {

1202 ContextNode *Node = getNodeForAlloc(C);

1204 return Node;

1205

1206 return NonAllocationCallToContextNodeMap.lookup(C);

1207}

1208

1209template <typename DerivedCCG, typename FuncTy, typename CallTy>

1210typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *

1211CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(

1212 const CallInfo &C) {

1213 return AllocationCallToContextNodeMap.lookup(C);

1214}

1215

1216template <typename DerivedCCG, typename FuncTy, typename CallTy>

1217typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *

1218CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(

1220 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);

1221 if (StackEntryNode != StackEntryIdToContextNodeMap.end())

1222 return StackEntryNode->second;

1223 return nullptr;

1224}

1225

1226template <typename DerivedCCG, typename FuncTy, typename CallTy>

1227void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::

1229 unsigned int ContextId) {

1230 for (auto &Edge : CallerEdges) {

1231 if (Edge->Caller == Caller) {

1233 Edge->getContextIds().insert(ContextId);

1234 return;

1235 }

1236 }

1237 std::shared_ptr Edge = std::make_shared(

1238 this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));

1239 CallerEdges.push_back(Edge);

1240 Caller->CalleeEdges.push_back(Edge);

1241}

1242

1243template <typename DerivedCCG, typename FuncTy, typename CallTy>

1244void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(

1245 ContextEdge *Edge, EdgeIter *EI, bool CalleeIter) {

1246 assert(!EI || (*EI)->get() == Edge);

1248

1249

1250

1251

1254

1255

1256

1257

1258 Edge->clear();

1259

1260#ifndef NDEBUG

1261 auto CalleeCallerCount = Callee->CallerEdges.size();

1262 auto CallerCalleeCount = Caller->CalleeEdges.size();

1263#endif

1264 if (!EI) {

1267 } else if (CalleeIter) {

1269 *EI = Caller->CalleeEdges.erase(*EI);

1270 } else {

1272 *EI = Callee->CallerEdges.erase(*EI);

1273 }

1274 assert(Callee->CallerEdges.size() < CalleeCallerCount);

1275 assert(Caller->CalleeEdges.size() < CallerCalleeCount);

1276}

1277

1278template <typename DerivedCCG, typename FuncTy, typename CallTy>

1279void CallsiteContextGraph<

1280 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {

1281 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {

1282 auto Edge = *EI;

1283 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {

1285 removeEdgeFromGraph(Edge.get(), &EI, true);

1286 } else

1287 ++EI;

1288 }

1289}

1290

1291template <typename DerivedCCG, typename FuncTy, typename CallTy>

1292void CallsiteContextGraph<

1293 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {

1294 for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {

1295 auto Edge = *EI;

1296 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {

1298 Edge->Caller->eraseCalleeEdge(Edge.get());

1299 EI = Node->CallerEdges.erase(EI);

1300 } else

1301 ++EI;

1302 }

1303}

1304

1305template <typename DerivedCCG, typename FuncTy, typename CallTy>

1306typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *

1307CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::

1308 findEdgeFromCallee(const ContextNode *Callee) {

1309 for (const auto &Edge : CalleeEdges)

1310 if (Edge->Callee == Callee)

1311 return Edge.get();

1312 return nullptr;

1313}

1314

1315template <typename DerivedCCG, typename FuncTy, typename CallTy>

1316typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *

1317CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::

1318 findEdgeFromCaller(const ContextNode *Caller) {

1319 for (const auto &Edge : CallerEdges)

1320 if (Edge->Caller == Caller)

1321 return Edge.get();

1322 return nullptr;

1323}

1324

1325template <typename DerivedCCG, typename FuncTy, typename CallTy>

1326void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::

1327 eraseCalleeEdge(const ContextEdge *Edge) {

1329 CalleeEdges, [Edge](const std::shared_ptr &CalleeEdge) {

1330 return CalleeEdge.get() == Edge;

1331 });

1332 assert(EI != CalleeEdges.end());

1333 CalleeEdges.erase(EI);

1334}

1335

1336template <typename DerivedCCG, typename FuncTy, typename CallTy>

1337void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::

1338 eraseCallerEdge(const ContextEdge *Edge) {

1340 CallerEdges, [Edge](const std::shared_ptr &CallerEdge) {

1341 return CallerEdge.get() == Edge;

1342 });

1343 assert(EI != CallerEdges.end());

1344 CallerEdges.erase(EI);

1345}

1346

1347template <typename DerivedCCG, typename FuncTy, typename CallTy>

1348uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(

1349 DenseSet<uint32_t> &ContextIds) const {

1350 uint8_t BothTypes =

1351 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;

1352 uint8_t AllocType = (uint8_t)AllocationType::None;

1353 for (auto Id : ContextIds) {

1354 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);

1355

1358 }

1360}

1361

1362template <typename DerivedCCG, typename FuncTy, typename CallTy>

1363uint8_t

1364CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(

1365 const DenseSet<uint32_t> &Node1Ids,

1366 const DenseSet<uint32_t> &Node2Ids) const {

1367 uint8_t BothTypes =

1368 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;

1369 uint8_t AllocType = (uint8_t)AllocationType::None;

1370 for (auto Id : Node1Ids) {

1371 if (!Node2Ids.count(Id))

1372 continue;

1373 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);

1374

1377 }

1379}

1380

1381template <typename DerivedCCG, typename FuncTy, typename CallTy>

1382uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(

1383 const DenseSet<uint32_t> &Node1Ids,

1384 const DenseSet<uint32_t> &Node2Ids) const {

1385 if (Node1Ids.size() < Node2Ids.size())

1386 return intersectAllocTypesImpl(Node1Ids, Node2Ids);

1387 else

1388 return intersectAllocTypesImpl(Node2Ids, Node1Ids);

1389}

1390

1391template <typename DerivedCCG, typename FuncTy, typename CallTy>

1392typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *

1393CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(

1394 CallInfo Call, const FuncTy *F) {

1396 ContextNode *AllocNode = createNewNode(true, F, Call);

1397 AllocationCallToContextNodeMap[Call] = AllocNode;

1398

1399 AllocNode->OrigStackOrAllocId = LastContextId;

1400

1401

1402 AllocNode->AllocTypes = (uint8_t)AllocationType::None;

1403

1404 return AllocNode;

1405}

1406

1408 if (!AllocTypes)

1409 return "None";

1410 std::string Str;

1412 Str += "NotCold";

1414 Str += "Cold";

1415 return Str;

1416}

1417

1418template <typename DerivedCCG, typename FuncTy, typename CallTy>

1419template <class NodeT, class IteratorT>

1420void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(

1421 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,

1424 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {

1425

1426

1427 if (AllocType == AllocationType::Hot)

1428 AllocType = AllocationType::NotCold;

1429

1430 ContextIdToAllocationType[++LastContextId] = AllocType;

1431

1432 bool IsImportant = false;

1433 if (!ContextSizeInfo.empty()) {

1434 auto &Entry = ContextIdToContextSizeInfos[LastContextId];

1435

1436

1438 uint64_t TotalCold = 0;

1439 for (auto &CSI : ContextSizeInfo)

1440 TotalCold += CSI.TotalSize;

1441

1442

1444

1445

1446 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {

1448

1449 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;

1450 TotalSizeToContextIdTopNCold.erase(

1451 TotalSizeToContextIdTopNCold.begin());

1452 assert(ImportantContextIdInfo.count(IdToRemove));

1453 ImportantContextIdInfo.erase(IdToRemove);

1454 }

1455 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;

1456 IsImportant = true;

1457 }

1458 }

1459 Entry.insert(Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());

1460 }

1461

1462

1463 AllocNode->AllocTypes |= (uint8_t)AllocType;

1464

1465

1466

1467

1468 ContextNode *PrevNode = AllocNode;

1469

1470

1471

1472 SmallSet<uint64_t, 8> StackIdSet;

1473

1475 ContextIter != StackContext.end(); ++ContextIter) {

1476 auto StackId = getStackId(*ContextIter);

1477 if (IsImportant)

1478 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);

1479 ContextNode *StackNode = getNodeForStackId(StackId);

1480 if (!StackNode) {

1481 StackNode = createNewNode(false);

1482 StackEntryIdToContextNodeMap[StackId] = StackNode;

1483 StackNode->OrigStackOrAllocId = StackId;

1484 }

1485

1486

1488 auto Ins = StackIdSet.insert(StackId);

1489 if (!Ins.second)

1490 StackNode->Recursive = true;

1491 }

1492 StackNode->AllocTypes |= (uint8_t)AllocType;

1493 PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);

1494 PrevNode = StackNode;

1495 }

1496}

1497

1498template <typename DerivedCCG, typename FuncTy, typename CallTy>

1499DenseSet<uint32_t>

1500CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(

1501 const DenseSet<uint32_t> &StackSequenceContextIds,

1502 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {

1503 DenseSet<uint32_t> NewContextIds;

1504 for (auto OldId : StackSequenceContextIds) {

1505 NewContextIds.insert(++LastContextId);

1506 OldToNewContextIds[OldId].insert(LastContextId);

1507 assert(ContextIdToAllocationType.count(OldId));

1508

1509 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];

1510 if (DotAllocContextIds.contains(OldId))

1511 DotAllocContextIds.insert(LastContextId);

1512 }

1513 return NewContextIds;

1514}

1515

1516template <typename DerivedCCG, typename FuncTy, typename CallTy>

1517void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::

1518 propagateDuplicateContextIds(

1519 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {

1520

1521 auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {

1522 DenseSet<uint32_t> NewIds;

1523 for (auto Id : ContextIds)

1524 if (auto NewId = OldToNewContextIds.find(Id);

1525 NewId != OldToNewContextIds.end())

1527 return NewIds;

1528 };

1529

1530

1531 auto UpdateCallers = [&](ContextNode *Node,

1532 DenseSet<const ContextEdge *> &Visited,

1533 auto &&UpdateCallers) -> void {

1534 for (const auto &Edge : Node->CallerEdges) {

1537 continue;

1538 ContextNode *NextNode = Edge->Caller;

1539 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());

1540

1541

1542 if (!NewIdsToAdd.empty()) {

1543 Edge->getContextIds().insert_range(NewIdsToAdd);

1544 UpdateCallers(NextNode, Visited, UpdateCallers);

1545 }

1546 }

1547 };

1548

1549 DenseSet<const ContextEdge *> Visited;

1550 for (auto &Entry : AllocationCallToContextNodeMap) {

1552 UpdateCallers(Node, Visited, UpdateCallers);

1553 }

1554}

1555

1556template <typename DerivedCCG, typename FuncTy, typename CallTy>

1557void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(

1558 ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee,

1559

1560

1561 DenseSet<uint32_t> RemainingContextIds) {

1562 auto &OrigEdges =

1563 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;

1564 DenseSet<uint32_t> RecursiveContextIds;

1565 DenseSet<uint32_t> AllCallerContextIds;

1567

1568

1569

1570 for (auto &CE : OrigEdges) {

1571 AllCallerContextIds.reserve(CE->getContextIds().size());

1572 for (auto Id : CE->getContextIds())

1573 if (!AllCallerContextIds.insert(Id).second)

1574 RecursiveContextIds.insert(Id);

1575 }

1576 }

1577

1578 for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {

1579 auto Edge = *EI;

1580 DenseSet<uint32_t> NewEdgeContextIds;

1581 DenseSet<uint32_t> NotFoundContextIds;

1582

1583

1584

1585 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,

1586 NotFoundContextIds);

1587

1588

1589 if (RecursiveContextIds.empty()) {

1590

1591

1592 RemainingContextIds.swap(NotFoundContextIds);

1593 } else {

1594

1595

1596

1597

1598

1599

1600

1601

1602 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =

1603 set_difference(NewEdgeContextIds, RecursiveContextIds);

1604 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);

1605 }

1606

1607 if (NewEdgeContextIds.empty()) {

1608 ++EI;

1609 continue;

1610 }

1611 if (TowardsCallee) {

1612 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);

1613 auto NewEdge = std::make_shared(

1614 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));

1615 NewNode->CalleeEdges.push_back(NewEdge);

1616 NewEdge->Callee->CallerEdges.push_back(NewEdge);

1617 } else {

1618 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);

1619 auto NewEdge = std::make_shared(

1620 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));

1621 NewNode->CallerEdges.push_back(NewEdge);

1622 NewEdge->Caller->CalleeEdges.push_back(NewEdge);

1623 }

1624

1625 if (Edge->getContextIds().empty()) {

1626 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);

1627 continue;

1628 }

1629 ++EI;

1630 }

1631}

1632

1633template <typename DerivedCCG, typename FuncTy, typename CallTy>

1635 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {

1636

1637

1639 assert(!Edge->ContextIds.empty());

1640}

1641

1642template <typename DerivedCCG, typename FuncTy, typename CallTy>

1643static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,

1644 bool CheckEdges = true) {

1645 if (Node->isRemoved())

1646 return;

1647#ifndef NDEBUG

1648

1649 auto NodeContextIds = Node->getContextIds();

1650#endif

1651

1652

1653 if (Node->CallerEdges.size()) {

1655 Node->CallerEdges.front()->ContextIds);

1657 if (CheckEdges)

1659 set_union(CallerEdgeContextIds, Edge->ContextIds);

1660 }

1661

1662

1663

1664

1666 NodeContextIds == CallerEdgeContextIds ||

1667 set_is_subset(CallerEdgeContextIds, NodeContextIds));

1668 }

1669 if (Node->CalleeEdges.size()) {

1671 Node->CalleeEdges.front()->ContextIds);

1673 if (CheckEdges)

1675 set_union(CalleeEdgeContextIds, Edge->getContextIds());

1676 }

1677

1678

1679

1681 NodeContextIds == CalleeEdgeContextIds);

1682 }

1683

1684

1685

1686#ifndef NDEBUG

1687

1688

1690 for (const auto &E : Node->CalleeEdges)

1693#endif

1694}

1695

1696template <typename DerivedCCG, typename FuncTy, typename CallTy>

1697void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::

1698 assignStackNodesPostOrder(ContextNode *Node,

1699 DenseSet<const ContextNode *> &Visited,

1700 DenseMap<uint64_t, std::vector>

1701 &StackIdToMatchingCalls,

1702 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,

1703 const DenseSet<uint32_t> &ImportantContextIds) {

1706 return;

1707

1708

1709

1710

1711 auto CallerEdges = Node->CallerEdges;

1712 for (auto &Edge : CallerEdges) {

1713

1714 if (Edge->isRemoved()) {

1716 continue;

1717 }

1718 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,

1719 CallToMatchingCall, ImportantContextIds);

1720 }

1721

1722

1723

1724

1725

1726

1727

1728 if (Node->IsAllocation ||

1729 !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))

1730 return;

1731

1732 auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];

1733

1734

1735

1736 if (Calls.size() == 1) {

1737 auto &[Call, Ids, Func, SavedContextIds] = Calls[0];

1738 if (Ids.size() == 1) {

1739 assert(SavedContextIds.empty());

1740

1741 assert(Node == getNodeForStackId(Ids[0]));

1742 if (Node->Recursive)

1743 return;

1745 NonAllocationCallToContextNodeMap[Call] = Node;

1746 NodeToCallingFunc[Node] = Func;

1747 recordStackNode(Ids, Node, Node->getContextIds(), ImportantContextIds);

1748 return;

1749 }

1750 }

1751

1752#ifndef NDEBUG

1753

1754

1755 uint64_t LastId = Node->OrigStackOrAllocId;

1756 ContextNode *LastNode = getNodeForStackId(LastId);

1757

1759 assert(LastNode == Node);

1760#else

1761 ContextNode *LastNode = Node;

1762#endif

1763

1764

1765

1766 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();

1767

1768 [[maybe_unused]] bool PrevIterCreatedNode = false;

1769 bool CreatedNode = false;

1770 for (unsigned I = 0; I < Calls.size();

1771 I++, PrevIterCreatedNode = CreatedNode) {

1772 CreatedNode = false;

1773 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];

1774

1775

1776 if (SavedContextIds.empty()) {

1777

1778

1779

1780

1782 continue;

1783 auto MatchingCall = CallToMatchingCall[Call];

1784 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {

1785

1786

1787

1788 assert(I > 0 && !PrevIterCreatedNode);

1789 continue;

1790 }

1791 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(

1793 continue;

1794 }

1795

1796 assert(LastId == Ids.back());

1797

1798

1799

1800

1801

1802

1803

1804 set_intersect(SavedContextIds, LastNodeContextIds);

1805 ContextNode *PrevNode = LastNode;

1806 bool Skip = false;

1807

1808

1809 for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {

1810 auto Id = *IdIter;

1811 ContextNode *CurNode = getNodeForStackId(Id);

1812

1813

1815 assert(!CurNode->Recursive);

1816

1817 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);

1818 if (Edge) {

1819 Skip = true;

1820 break;

1821 }

1822 PrevNode = CurNode;

1823

1824

1825

1827

1828

1829 if (SavedContextIds.empty()) {

1830 Skip = true;

1831 break;

1832 }

1833 }

1834 if (Skip)

1835 continue;

1836

1837

1838 ContextNode *NewNode = createNewNode(false, Func, Call);

1839 NonAllocationCallToContextNodeMap[Call] = NewNode;

1840 CreatedNode = true;

1841 NewNode->AllocTypes = computeAllocType(SavedContextIds);

1842

1843 ContextNode *FirstNode = getNodeForStackId(Ids[0]);

1845

1846

1847

1848

1849 connectNewNode(NewNode, FirstNode, true, SavedContextIds);

1850

1851

1852

1853

1854 connectNewNode(NewNode, LastNode, false, SavedContextIds);

1855

1856

1857

1858 PrevNode = nullptr;

1859 for (auto Id : Ids) {

1860 ContextNode *CurNode = getNodeForStackId(Id);

1861

1863

1864

1865

1866 if (PrevNode) {

1867 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);

1868

1869

1870 if (!PrevEdge) {

1871 PrevNode = CurNode;

1872 continue;

1873 }

1874 set_subtract(PrevEdge->getContextIds(), SavedContextIds);

1875 if (PrevEdge->getContextIds().empty())

1876 removeEdgeFromGraph(PrevEdge);

1877 }

1878

1879

1880

1881 CurNode->AllocTypes = CurNode->CalleeEdges.empty()

1882 ? (uint8_t)AllocationType::None

1883 : CurNode->computeAllocType();

1884 PrevNode = CurNode;

1885 }

1886

1887 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);

1888

1891 for (auto Id : Ids) {

1892 ContextNode *CurNode = getNodeForStackId(Id);

1893

1896 }

1897 }

1898 }

1899}

1900

1901template <typename DerivedCCG, typename FuncTy, typename CallTy>

1902void CallsiteContextGraph<DerivedCCG, FuncTy,

1903 CallTy>::fixupImportantContexts() {

1904 if (ImportantContextIdInfo.empty())

1905 return;

1906

1907

1908 NumImportantContextIds = ImportantContextIdInfo.size();

1909

1911 return;

1912

1914 exportToDot("beforestackfixup");

1915

1916

1917

1918

1919

1920

1921

1922

1923

1924

1925

1926

1927

1928

1929

1930

1931

1932

1933

1934

1935

1936

1937

1938

1939 for (auto &[CurContextId, Info] : ImportantContextIdInfo) {

1940 if (Info.StackIdsToNode.empty())

1941 continue;

1943 ContextNode *PrevNode = nullptr;

1944 ContextNode *CurNode = nullptr;

1945 DenseSet<const ContextEdge *> VisitedEdges;

1946 ArrayRef<uint64_t> AllStackIds(Info.StackIds);

1947

1948

1949 for (unsigned I = 0; I < AllStackIds.size(); I++, PrevNode = CurNode) {

1950

1951

1952 auto Len = Info.MaxLength;

1953 auto LenToEnd = AllStackIds.size() - I;

1954 if (Len > LenToEnd)

1955 Len = LenToEnd;

1956 CurNode = nullptr;

1957

1958

1959 for (; Len > 0; Len--) {

1960

1961 auto CheckStackIds = AllStackIds.slice(I, Len);

1962 auto EntryIt = Info.StackIdsToNode.find(CheckStackIds);

1963 if (EntryIt == Info.StackIdsToNode.end())

1964 continue;

1965 CurNode = EntryIt->second;

1966

1967

1968 I += Len - 1;

1969 break;

1970 }

1971

1972

1973

1974

1975 if (!CurNode)

1976 break;

1977

1978

1979 if (!PrevNode)

1980 continue;

1981

1982 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);

1983 if (CurEdge) {

1984

1985 if (CurEdge->getContextIds().insert(CurContextId).second) {

1986 NumFixupEdgeIdsInserted++;

1988 }

1989 } else {

1990

1991 NumFixupEdgesAdded++;

1992 DenseSet<uint32_t> ContextIds({CurContextId});

1993 auto AllocType = computeAllocType(ContextIds);

1994 auto NewEdge = std::make_shared(

1995 PrevNode, CurNode, AllocType, std::move(ContextIds));

1996 PrevNode->CallerEdges.push_back(NewEdge);

1997 CurNode->CalleeEdges.push_back(NewEdge);

1998

1999 CurEdge = NewEdge.get();

2001 }

2002 VisitedEdges.insert(CurEdge);

2003

2004

2005 for (auto &Edge : PrevNode->CallerEdges) {

2006

2007

2008 if (Edge.get() != CurEdge && !VisitedEdges.contains(Edge.get()))

2009 Edge->getContextIds().erase(CurContextId);

2010 }

2011 }

2013 NumFixedContexts++;

2014 }

2015}

2016

2017template <typename DerivedCCG, typename FuncTy, typename CallTy>

2018void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {

2019

2020

2021

2022

2023

2024

2025

2026 DenseMap<uint64_t, std::vector> StackIdToMatchingCalls;

2027 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {

2028 for (auto &Call : CallsWithMetadata) {

2029

2030 if (AllocationCallToContextNodeMap.count(Call))

2031 continue;

2032 auto StackIdsWithContextNodes =

2033 getStackIdsWithContextNodesForCall(Call.call());

2034

2035

2036 if (StackIdsWithContextNodes.empty())

2037 continue;

2038

2039

2040 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(

2041 {Call.call(), StackIdsWithContextNodes, Func, {}});

2042 }

2043 }

2044

2045

2046

2047

2048

2049

2050

2051 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;

2052

2053

2054

2055 DenseMap<CallInfo, CallInfo> CallToMatchingCall;

2056 for (auto &It : StackIdToMatchingCalls) {

2057 auto &Calls = It.getSecond();

2058

2059 if (Calls.size() == 1) {

2060 auto &Ids = Calls[0].StackIds;

2061 if (Ids.size() == 1)

2062 continue;

2063 }

2064

2065

2066

2067

2068

2069

2070

2071

2072

2073

2074 DenseMap<const FuncTy *, unsigned> FuncToIndex;

2075 for (const auto &[Idx, CallCtxInfo] : enumerate(Calls))

2076 FuncToIndex.insert({CallCtxInfo.Func, Idx});

2078 Calls,

2079 [&FuncToIndex](const CallContextInfo &A, const CallContextInfo &B) {

2080 return A.StackIds.size() > B.StackIds.size() ||

2081 (A.StackIds.size() == B.StackIds.size() &&

2082 (A.StackIds < B.StackIds ||

2083 (A.StackIds == B.StackIds &&

2084 FuncToIndex[A.Func] < FuncToIndex[B.Func])));

2085 });

2086

2087

2088

2089

2090 uint64_t LastId = It.getFirst();

2091 ContextNode *LastNode = getNodeForStackId(LastId);

2092

2094

2095 if (LastNode->Recursive)

2096 continue;

2097

2098

2099

2100 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();

2102

2103#ifndef NDEBUG

2104

2105

2106

2107

2108 DenseSet<const FuncTy *> MatchingIdsFuncSet;

2109#endif

2110

2111 for (unsigned I = 0; I < Calls.size(); I++) {

2112 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];

2113 assert(SavedContextIds.empty());

2114 assert(LastId == Ids.back());

2115

2116#ifndef NDEBUG

2117

2118

2119 if (I > 0 && Ids != Calls[I - 1].StackIds)

2120 MatchingIdsFuncSet.clear();

2121#endif

2122

2123

2124

2125

2127 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;

2128

2129 ContextNode *PrevNode = LastNode;

2130 ContextNode *CurNode = LastNode;

2131 bool Skip = false;

2132

2133

2134

2135 for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {

2136 auto Id = *IdIter;

2137 CurNode = getNodeForStackId(Id);

2138

2140

2141 if (CurNode->Recursive) {

2142 Skip = true;

2143 break;

2144 }

2145

2146 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);

2147

2148

2149

2150

2151

2152

2153

2154

2155

2156 if (Edge) {

2157 Skip = true;

2158 break;

2159 }

2160 PrevNode = CurNode;

2161

2162

2163

2165

2166

2167 if (StackSequenceContextIds.empty()) {

2168 Skip = true;

2169 break;

2170 }

2171 }

2172 if (Skip)

2173 continue;

2174

2175

2176

2177

2178

2179

2180 if (Ids.back() != getLastStackId(Call)) {

2181 for (const auto &PE : LastNode->CallerEdges) {

2182 set_subtract(StackSequenceContextIds, PE->getContextIds());

2183 if (StackSequenceContextIds.empty())

2184 break;

2185 }

2186

2187 if (StackSequenceContextIds.empty())

2188 continue;

2189 }

2190

2191#ifndef NDEBUG

2192

2193

2194

2195

2196

2198

2199 MatchingIdsFuncSet.insert(Func);

2200#endif

2201

2202

2203

2204

2205

2206 bool DuplicateContextIds = false;

2207 for (unsigned J = I + 1; J < Calls.size(); J++) {

2208 auto &CallCtxInfo = Calls[J];

2209 auto &NextIds = CallCtxInfo.StackIds;

2210 if (NextIds != Ids)

2211 break;

2212 auto *NextFunc = CallCtxInfo.Func;

2213 if (NextFunc != Func) {

2214

2215

2216 DuplicateContextIds = true;

2217 break;

2218 }

2219 auto &NextCall = CallCtxInfo.Call;

2220 CallToMatchingCall[NextCall] = Call;

2221

2222 I = J;

2223 }

2224

2225

2226

2227

2228

2229

2230

2231 OldToNewContextIds.reserve(OldToNewContextIds.size() +

2232 StackSequenceContextIds.size());

2233 SavedContextIds =

2234 DuplicateContextIds

2235 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)

2236 : StackSequenceContextIds;

2237 assert(!SavedContextIds.empty());

2238

2239 if (!DuplicateContextIds) {

2240

2241

2242

2243 set_subtract(LastNodeContextIds, StackSequenceContextIds);

2244 if (LastNodeContextIds.empty())

2245 break;

2246 }

2247 }

2248 }

2249

2250

2251 propagateDuplicateContextIds(OldToNewContextIds);

2252

2254 check();

2255

2256

2257

2258

2259

2260

2261 DenseSet<const ContextNode *> Visited;

2263 ImportantContextIdInfo.keys());

2264 for (auto &Entry : AllocationCallToContextNodeMap)

2265 assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,

2266 CallToMatchingCall, ImportantContextIds);

2267

2268 fixupImportantContexts();

2269

2271 check();

2272}

2273

2274uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {

2275 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(

2277 return CallsiteContext.back();

2278}

2279

2280uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {

2282 CallStack<CallsiteInfo, SmallVector::const_iterator>

2284

2285 return Index.getStackIdAtIndex(CallsiteContext.back());

2286}

2287

2289

2291

2292

2293 if (!CloneNo)

2294 return Base.str();

2296}

2297

2301

2302

2303

2304

2307 auto Pos = F.getName().find_last_of('.');

2309 unsigned CloneNo;

2310 bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);

2312 (void)Err;

2313 return CloneNo;

2314}

2315

2316std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,

2317 const Instruction *Call,

2318 unsigned CloneNo) const {

2321 .str();

2322}

2323

2324std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,

2325 const IndexCall &Call,

2326 unsigned CloneNo) const {

2327 auto VI = FSToVIMap.find(Func);

2328 assert(VI != FSToVIMap.end());

2331 return CallerName + " -> alloc";

2332 else {

2334 return CallerName + " -> " +

2336 Callsite->Clones[CloneNo]);

2337 }

2338}

2339

2340std::vector<uint64_t>

2341ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(

2342 Instruction *Call) {

2343 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(

2345 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(

2346 CallsiteContext);

2347}

2348

2349std::vector<uint64_t>

2350IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {

2352 CallStack<CallsiteInfo, SmallVector::const_iterator>

2354 return getStackIdsWithContextNodes<CallsiteInfo,

2355 SmallVector::const_iterator>(

2356 CallsiteContext);

2357}

2358

2359template <typename DerivedCCG, typename FuncTy, typename CallTy>

2360template <class NodeT, class IteratorT>

2361std::vector<uint64_t>

2362CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(

2363 CallStack<NodeT, IteratorT> &CallsiteContext) {

2364 std::vector<uint64_t> StackIds;

2365 for (auto IdOrIndex : CallsiteContext) {

2366 auto StackId = getStackId(IdOrIndex);

2367 ContextNode *Node = getNodeForStackId(StackId);

2368 if (!Node)

2369 break;

2370 StackIds.push_back(StackId);

2371 }

2372 return StackIds;

2373}

2374

2375ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(

2377 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)

2378 : Mod(M), OREGetter(OREGetter) {

2379

2380

2381

2382 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;

2383 for (auto &F : M) {

2384 std::vector CallsWithMetadata;

2385 for (auto &BB : F) {

2386 for (auto &I : BB) {

2388 continue;

2389 if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {

2390 CallsWithMetadata.push_back(&I);

2391 auto *AllocNode = addAllocNode(&I, &F);

2392 auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);

2395

2396 for (auto &MDOp : MemProfMD->operands()) {

2398 std::vector ContextSizeInfo;

2399

2400 if (MIBMD->getNumOperands() > 2) {

2401 for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {

2402 MDNode *ContextSizePair =

2407 ->getZExtValue();

2410 ->getZExtValue();

2411 ContextSizeInfo.push_back({FullStackId, TotalSize});

2412 }

2413 }

2417 addStackNodesForMIB<MDNode, MDNode::op_iterator>(

2418 AllocNode, StackContext, CallsiteContext,

2420 TotalSizeToContextIdTopNCold);

2421 }

2422

2423

2425 DotAllocContextIds = AllocNode->getContextIds();

2427

2428

2429 I.setMetadata(LLVMContext::MD_memprof, nullptr);

2430 I.setMetadata(LLVMContext::MD_callsite, nullptr);

2431 }

2432

2433 else if (I.getMetadata(LLVMContext::MD_callsite)) {

2434 CallsWithMetadata.push_back(&I);

2435 }

2436 }

2437 }

2438 if (!CallsWithMetadata.empty())

2439 FuncToCallsWithMetadata[&F] = CallsWithMetadata;

2440 }

2441

2443 dbgs() << "CCG before updating call stack chains:\n";

2444 dbgs() << *this;

2445 }

2446

2448 exportToDot("prestackupdate");

2449

2450 updateStackNodes();

2451

2453 exportToDot("poststackupdate");

2454

2455 handleCallsitesWithMultipleTargets();

2456

2457 markBackedges();

2458

2459

2460 for (auto &FuncEntry : FuncToCallsWithMetadata)

2461 for (auto &Call : FuncEntry.second)

2462 Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);

2463}

2464

2465IndexCallsiteContextGraph::IndexCallsiteContextGraph(

2468 isPrevailing)

2469 : Index(Index), isPrevailing(isPrevailing) {

2470

2471

2472

2473 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;

2474 for (auto &I : Index) {

2475 auto VI = Index.getValueInfo(I);

2476 for (auto &S : VI.getSummaryList()) {

2477

2478

2479

2480

2481

2482

2483

2485 !isPrevailing(VI.getGUID(), S.get()))

2486 continue;

2488 if (!FS)

2489 continue;

2490 std::vector CallsWithMetadata;

2491 if (FS->allocs().empty()) {

2492 for (auto &AN : FS->mutableAllocs()) {

2493

2494

2495

2496

2497 if (AN.MIBs.empty())

2498 continue;

2499 IndexCall AllocCall(&AN);

2500 CallsWithMetadata.push_back(AllocCall);

2501 auto *AllocNode = addAllocNode(AllocCall, FS);

2502

2503

2504

2506 EmptyContext;

2507 unsigned I = 0;

2509 AN.ContextSizeInfos.size() == AN.MIBs.size());

2510

2511 for (auto &MIB : AN.MIBs) {

2513 StackContext(&MIB);

2514 std::vector ContextSizeInfo;

2515 if (!AN.ContextSizeInfos.empty()) {

2516 for (auto [FullStackId, TotalSize] : AN.ContextSizeInfos[I])

2517 ContextSizeInfo.push_back({FullStackId, TotalSize});

2518 }

2519 addStackNodesForMIB<MIBInfo, SmallVector::const_iterator>(

2520 AllocNode, StackContext, EmptyContext, MIB.AllocType,

2521 ContextSizeInfo, TotalSizeToContextIdTopNCold);

2522 I++;

2523 }

2524

2525

2527 DotAllocContextIds = AllocNode->getContextIds();

2529

2530

2531

2532

2533 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);

2534 }

2535 }

2536

2537 if (FS->callsites().empty())

2538 for (auto &SN : FS->mutableCallsites()) {

2539 IndexCall StackNodeCall(&SN);

2540 CallsWithMetadata.push_back(StackNodeCall);

2541 }

2542

2543 if (!CallsWithMetadata.empty())

2544 FuncToCallsWithMetadata[FS] = CallsWithMetadata;

2545

2546 if (FS->allocs().empty() || FS->callsites().empty())

2547 FSToVIMap[FS] = VI;

2548 }

2549 }

2550

2552 dbgs() << "CCG before updating call stack chains:\n";

2553 dbgs() << *this;

2554 }

2555

2557 exportToDot("prestackupdate");

2558

2559 updateStackNodes();

2560

2562 exportToDot("poststackupdate");

2563

2564 handleCallsitesWithMultipleTargets();

2565

2566 markBackedges();

2567}

2568

2569template <typename DerivedCCG, typename FuncTy, typename CallTy>

2570void CallsiteContextGraph<DerivedCCG, FuncTy,

2571 CallTy>::handleCallsitesWithMultipleTargets() {

2572

2573

2574

2575

2576

2577

2578

2579

2580

2581

2582

2583

2585

2586 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;

2587 for (auto &Entry : NonAllocationCallToContextNodeMap) {

2588 auto *Node = Entry.second;

2590

2591

2592

2593

2594

2595

2596

2597 std::vector AllCalls;

2598 AllCalls.reserve(Node->MatchingCalls.size() + 1);

2599 AllCalls.push_back(Node->Call);

2601

2602

2603

2604

2605

2606

2607

2608

2609

2610

2611

2612

2613 if (partitionCallsByCallee(Node, AllCalls, NewCallToNode))

2614 continue;

2615

2616 auto It = AllCalls.begin();

2617

2618 for (; It != AllCalls.end(); ++It) {

2620 bool Match = true;

2621 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();

2622 ++EI) {

2623 auto Edge = *EI;

2624 if (!Edge->Callee->hasCall())

2625 continue;

2626 assert(NodeToCallingFunc.count(Edge->Callee));

2627

2628 if (!calleesMatch(ThisCall.call(), EI, TailCallToContextNodeMap)) {

2629 Match = false;

2630 break;

2631 }

2632 }

2633

2634 if (Match) {

2635

2636

2637 if (Node->Call != ThisCall) {

2638 Node->setCall(ThisCall);

2639

2640

2641

2643 }

2644 break;

2645 }

2646 }

2647

2648

2649 Node->MatchingCalls.clear();

2650

2651

2652 if (It == AllCalls.end()) {

2653 RemovedEdgesWithMismatchedCallees++;

2654

2655

2656

2657 Node->setCall(CallInfo());

2658 continue;

2659 }

2660

2661

2662 for (++It; It != AllCalls.end(); ++It) {

2664 if (!sameCallee(Node->Call.call(), ThisCall.call()))

2665 continue;

2666 Node->MatchingCalls.push_back(ThisCall);

2667 }

2668 }

2669

2670

2671

2672

2673

2674

2675 NonAllocationCallToContextNodeMap.remove_if([](const auto &it) {

2676 return !it.second->hasCall() || it.second->Call != it.first;

2677 });

2678

2679

2680 for (auto &[Call, Node] : NewCallToNode)

2681 NonAllocationCallToContextNodeMap[Call] = Node;

2682

2683

2684

2685 for (auto &[Call, Node] : TailCallToContextNodeMap)

2686 NonAllocationCallToContextNodeMap[Call] = Node;

2687}

2688

2689template <typename DerivedCCG, typename FuncTy, typename CallTy>

2690bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(

2692 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {

2693

2694

2695

2696 struct CallsWithSameCallee {

2697 std::vector Calls;

2698 ContextNode *Node = nullptr;

2699 };

2700

2701

2702

2704 for (auto ThisCall : AllCalls) {

2705 auto *F = getCalleeFunc(ThisCall.call());

2706 if (F)

2707 CalleeFuncToCallInfo[F].Calls.push_back(ThisCall);

2708 }

2709

2710

2711

2712

2713

2714

2716 for (const auto &Edge : Node->CalleeEdges) {

2717 if (!Edge->Callee->hasCall())

2718 continue;

2719 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];

2720 if (CalleeFuncToCallInfo.contains(ProfiledCalleeFunc))

2721 CalleeNodeToCallInfo[Edge->Callee] =

2722 &CalleeFuncToCallInfo[ProfiledCalleeFunc];

2723 }

2724

2725

2726

2727

2728 if (CalleeNodeToCallInfo.empty())

2729 return false;

2730

2731

2732

2733

2734

2735

2736

2737

2738

2739

2740 ContextNode *UnmatchedCalleesNode = nullptr;

2741

2742 bool UsedOrigNode = false;

2744

2745

2746

2747 auto CalleeEdges = Node->CalleeEdges;

2748 for (auto &Edge : CalleeEdges) {

2749 if (!Edge->Callee->hasCall())

2750 continue;

2751

2752

2753

2754 ContextNode *CallerNodeToUse = nullptr;

2755

2756

2757

2758 if (!CalleeNodeToCallInfo.contains(Edge->Callee)) {

2759 if (!UnmatchedCalleesNode)

2760 UnmatchedCalleesNode =

2761 createNewNode(false, NodeToCallingFunc[Node]);

2762 CallerNodeToUse = UnmatchedCalleesNode;

2763 } else {

2764

2765

2766 auto *Info = CalleeNodeToCallInfo[Edge->Callee];

2767 if (Info->Node) {

2768

2769 if (!UsedOrigNode) {

2771

2772 Node->MatchingCalls.clear();

2773 UsedOrigNode = true;

2774 } else

2775 Info->Node =

2776 createNewNode(false, NodeToCallingFunc[Node]);

2778

2779

2780 Info->Node->setCall(Info->Calls.front());

2783

2784

2785

2786 NewCallToNode.push_back({Info->Node->Call, Info->Node});

2787 }

2788 CallerNodeToUse = Info->Node;

2789 }

2790

2791

2792 if (CallerNodeToUse == Node)

2793 continue;

2794

2795 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);

2796 }

2797

2798

2799

2800

2801

2802 for (auto &I : CalleeNodeToCallInfo)

2803 removeNoneTypeCallerEdges(I.second->Node);

2804 if (UnmatchedCalleesNode)

2805 removeNoneTypeCallerEdges(UnmatchedCalleesNode);

2806 removeNoneTypeCallerEdges(Node);

2807

2808 return true;

2809}

2810

2811uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {

2812

2813 return IdOrIndex;

2814}

2815

2816uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {

2817

2818

2819 return Index.getStackIdAtIndex(IdOrIndex);

2820}

2821

2822template <typename DerivedCCG, typename FuncTy, typename CallTy>

2823bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(

2824 CallTy Call, EdgeIter &EI,

2825 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {

2826 auto Edge = *EI;

2827 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];

2828 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];

2829

2830

2831 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;

2832 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,

2833 FoundCalleeChain))

2834 return false;

2835

2836

2837 if (FoundCalleeChain.empty())

2838 return true;

2839

2840 auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) {

2841 auto *CurEdge = Callee->findEdgeFromCaller(Caller);

2842

2843

2844 if (CurEdge) {

2845 CurEdge->ContextIds.insert_range(Edge->ContextIds);

2846 CurEdge->AllocTypes |= Edge->AllocTypes;

2847 return;

2848 }

2849

2850

2851 auto NewEdge = std::make_shared(

2852 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);

2853 Callee->CallerEdges.push_back(NewEdge);

2854 if (Caller == Edge->Caller) {

2855

2856

2857

2858 EI = Caller->CalleeEdges.insert(EI, NewEdge);

2859 ++EI;

2861 "Iterator position not restored after insert and increment");

2862 } else

2863 Caller->CalleeEdges.push_back(NewEdge);

2864 };

2865

2866

2867

2868 auto *CurCalleeNode = Edge->Callee;

2869 for (auto &[NewCall, Func] : FoundCalleeChain) {

2870 ContextNode *NewNode = nullptr;

2871

2872 if (TailCallToContextNodeMap.count(NewCall)) {

2873 NewNode = TailCallToContextNodeMap[NewCall];

2874 NewNode->AllocTypes |= Edge->AllocTypes;

2875 } else {

2876 FuncToCallsWithMetadata[Func].push_back({NewCall});

2877

2878 NewNode = createNewNode(false, Func, NewCall);

2879 TailCallToContextNodeMap[NewCall] = NewNode;

2880 NewNode->AllocTypes = Edge->AllocTypes;

2881 }

2882

2883

2884 AddEdge(NewNode, CurCalleeNode);

2885

2886 CurCalleeNode = NewNode;

2887 }

2888

2889

2890 AddEdge(Edge->Caller, CurCalleeNode);

2891

2892#ifndef NDEBUG

2893

2895#endif

2896

2897

2898 removeEdgeFromGraph(Edge.get(), &EI, true);

2899

2900

2901

2902

2903

2905 --EI;

2906

2907 return true;

2908}

2909

2910bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(

2911 const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,

2912 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,

2913 bool &FoundMultipleCalleeChains) {

2914

2915

2917 return false;

2918

2920 FoundCalleeChain.push_back({Callsite, F});

2921 };

2922

2924 if (!CalleeFunc) {

2929 }

2930

2931

2932

2933

2934

2935 bool FoundSingleCalleeChain = false;

2936 for (auto &BB : *CalleeFunc) {

2937 for (auto &I : BB) {

2939 if (!CB || !CB->isTailCall())

2940 continue;

2941 auto *CalledValue = CB->getCalledOperand();

2942 auto *CalledFunction = CB->getCalledFunction();

2943 if (CalledValue && !CalledFunction) {

2944 CalledValue = CalledValue->stripPointerCasts();

2945

2947 }

2948

2949

2951 assert(!CalledFunction &&

2952 "Expected null called function in callsite for alias");

2954 }

2955 if (!CalledFunction)

2956 continue;

2957 if (CalledFunction == ProfiledCallee) {

2958 if (FoundSingleCalleeChain) {

2959 FoundMultipleCalleeChains = true;

2960 return false;

2961 }

2962 FoundSingleCalleeChain = true;

2963 FoundProfiledCalleeCount++;

2964 FoundProfiledCalleeDepth += Depth;

2965 if (Depth > FoundProfiledCalleeMaxDepth)

2966 FoundProfiledCalleeMaxDepth = Depth;

2967 SaveCallsiteInfo(&I, CalleeFunc);

2968 } else if (findProfiledCalleeThroughTailCalls(

2969 ProfiledCallee, CalledFunction, Depth + 1,

2970 FoundCalleeChain, FoundMultipleCalleeChains)) {

2971

2972

2973 assert(!FoundMultipleCalleeChains);

2974 if (FoundSingleCalleeChain) {

2975 FoundMultipleCalleeChains = true;

2976 return false;

2977 }

2978 FoundSingleCalleeChain = true;

2979 SaveCallsiteInfo(&I, CalleeFunc);

2980 } else if (FoundMultipleCalleeChains)

2981 return false;

2982 }

2983 }

2984

2985 return FoundSingleCalleeChain;

2986}

2987

2988const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *Call) {

2990 if (!CB->getCalledOperand() || CB->isIndirectCall())

2991 return nullptr;

2992 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();

2994 if (Alias)

2997}

2998

2999bool ModuleCallsiteContextGraph::calleeMatchesFunc(

3000 Instruction *Call, const Function *Func, const Function *CallerFunc,

3001 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {

3003 if (!CB->getCalledOperand() || CB->isIndirectCall())

3004 return false;

3005 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();

3007 if (CalleeFunc == Func)

3008 return true;

3010 if (Alias && Alias->getAliasee() == Func)

3011 return true;

3012

3013

3014

3015

3016

3017

3018

3019

3020 unsigned Depth = 1;

3021 bool FoundMultipleCalleeChains = false;

3022 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth,

3023 FoundCalleeChain,

3024 FoundMultipleCalleeChains)) {

3025 LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: "

3026 << Func->getName() << " from " << CallerFunc->getName()

3027 << " that actually called " << CalleeVal->getName()

3028 << (FoundMultipleCalleeChains

3029 ? " (found multiple possible chains)"

3030 : "")

3031 << "\n");

3032 if (FoundMultipleCalleeChains)

3033 FoundProfiledCalleeNonUniquelyCount++;

3034 return false;

3035 }

3036

3037 return true;

3038}

3039

3040bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,

3041 Instruction *Call2) {

3043 if (!CB1->getCalledOperand() || CB1->isIndirectCall())

3044 return false;

3045 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();

3048 if (!CB2->getCalledOperand() || CB2->isIndirectCall())

3049 return false;

3050 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();

3052 return CalleeFunc1 == CalleeFunc2;

3053}

3054

3055bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(

3056 ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,

3057 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,

3058 bool &FoundMultipleCalleeChains) {

3059

3060

3062 return false;

3063

3064 auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) {

3065

3066

3067 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||

3068 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))

3069

3070

3071 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =

3072 std::make_unique(Callee, SmallVector());

3073 CallsiteInfo *NewCallsiteInfo =

3074 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get();

3075 FoundCalleeChain.push_back({NewCallsiteInfo, FS});

3076 };

3077

3078

3079

3080

3081

3082 bool FoundSingleCalleeChain = false;

3085 !isPrevailing(CurCallee.getGUID(), S.get()))

3086 continue;

3088 if (!FS)

3089 continue;

3090 auto FSVI = CurCallee;

3092 if (AS)

3093 FSVI = AS->getAliaseeVI();

3094 for (auto &CallEdge : FS->calls()) {

3095 if (!CallEdge.second.hasTailCall())

3096 continue;

3097 if (CallEdge.first == ProfiledCallee) {

3098 if (FoundSingleCalleeChain) {

3099 FoundMultipleCalleeChains = true;

3100 return false;

3101 }

3102 FoundSingleCalleeChain = true;

3103 FoundProfiledCalleeCount++;

3104 FoundProfiledCalleeDepth += Depth;

3105 if (Depth > FoundProfiledCalleeMaxDepth)

3106 FoundProfiledCalleeMaxDepth = Depth;

3107 CreateAndSaveCallsiteInfo(CallEdge.first, FS);

3108

3109 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);

3110 FSToVIMap[FS] = FSVI;

3111 } else if (findProfiledCalleeThroughTailCalls(

3112 ProfiledCallee, CallEdge.first, Depth + 1,

3113 FoundCalleeChain, FoundMultipleCalleeChains)) {

3114

3115

3116 assert(!FoundMultipleCalleeChains);

3117 if (FoundSingleCalleeChain) {

3118 FoundMultipleCalleeChains = true;

3119 return false;

3120 }

3121 FoundSingleCalleeChain = true;

3122 CreateAndSaveCallsiteInfo(CallEdge.first, FS);

3123

3124 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);

3125 FSToVIMap[FS] = FSVI;

3126 } else if (FoundMultipleCalleeChains)

3127 return false;

3128 }

3129 }

3130

3131 return FoundSingleCalleeChain;

3132}

3133

3134const FunctionSummary *

3135IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {

3137 if (Callee.getSummaryList().empty())

3138 return nullptr;

3140}

3141

3142bool IndexCallsiteContextGraph::calleeMatchesFunc(

3143 IndexCall &Call, const FunctionSummary *Func,

3144 const FunctionSummary *CallerFunc,

3145 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {

3147

3148

3149 AliasSummary *Alias =

3150 Callee.getSummaryList().empty()

3151 ? nullptr

3153 assert(FSToVIMap.count(Func));

3154 auto FuncVI = FSToVIMap[Func];

3155 if (Callee == FuncVI ||

3156

3157

3158

3160 return true;

3161

3162

3163

3164

3165

3166

3167

3168

3169 unsigned Depth = 1;

3170 bool FoundMultipleCalleeChains = false;

3171 if (!findProfiledCalleeThroughTailCalls(

3172 FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {

3173 LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI

3174 << " from " << FSToVIMap[CallerFunc]

3175 << " that actually called " << Callee

3176 << (FoundMultipleCalleeChains

3177 ? " (found multiple possible chains)"

3178 : "")

3179 << "\n");

3180 if (FoundMultipleCalleeChains)

3181 FoundProfiledCalleeNonUniquelyCount++;

3182 return false;

3183 }

3184

3185 return true;

3186}

3187

3188bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {

3191 return Callee1 == Callee2;

3192}

3193

3194template <typename DerivedCCG, typename FuncTy, typename CallTy>

3195void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()

3196 const {

3198 dbgs() << "\n";

3199}

3200

3201template <typename DerivedCCG, typename FuncTy, typename CallTy>

3202void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(

3203 raw_ostream &OS) const {

3204 OS << "Node " << this << "\n";

3205 OS << "\t";

3206 printCall(OS);

3207 if (Recursive)

3208 OS << " (recursive)";

3209 OS << "\n";

3210 if (!MatchingCalls.empty()) {

3211 OS << "\tMatchingCalls:\n";

3212 for (auto &MatchingCall : MatchingCalls) {

3213 OS << "\t";

3214 MatchingCall.print(OS);

3215 OS << "\n";

3216 }

3217 }

3218 OS << "\tNodeId: " << NodeId << "\n";

3220 OS << "\tContextIds:";

3221

3222 auto ContextIds = getContextIds();

3223 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());

3224 std::sort(SortedIds.begin(), SortedIds.end());

3225 for (auto Id : SortedIds)

3226 OS << " " << Id;

3227 OS << "\n";

3228 OS << "\tCalleeEdges:\n";

3229 for (auto &Edge : CalleeEdges)

3230 OS << "\t\t" << *Edge << " (Callee NodeId: " << Edge->Callee->NodeId

3231 << ")\n";

3232 OS << "\tCallerEdges:\n";

3233 for (auto &Edge : CallerEdges)

3234 OS << "\t\t" << *Edge << " (Caller NodeId: " << Edge->Caller->NodeId

3235 << ")\n";

3236 if (!Clones.empty()) {

3237 OS << "\tClones: ";

3238 bool First = true;

3239 for (auto *C : Clones) {

3241 OS << ", ";

3243 OS << C << " NodeId: " << C->NodeId;

3244 }

3245 OS << "\n";

3246 } else if (CloneOf) {

3247 OS << "\tClone of " << CloneOf << " NodeId: " << CloneOf->NodeId << "\n";

3248 }

3249}

3250

3251template <typename DerivedCCG, typename FuncTy, typename CallTy>

3252void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()

3253 const {

3255 dbgs() << "\n";

3256}

3257

3258template <typename DerivedCCG, typename FuncTy, typename CallTy>

3259void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(

3260 raw_ostream &OS) const {

3261 OS << "Edge from Callee " << Callee << " to Caller: " << Caller

3262 << (IsBackedge ? " (BE)" : "")

3264 OS << " ContextIds:";

3265 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());

3266 std::sort(SortedIds.begin(), SortedIds.end());

3267 for (auto Id : SortedIds)

3268 OS << " " << Id;

3269}

3270

3271template <typename DerivedCCG, typename FuncTy, typename CallTy>

3272void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {

3274}

3275

3276template <typename DerivedCCG, typename FuncTy, typename CallTy>

3277void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(

3278 raw_ostream &OS) const {

3279 OS << "Callsite Context Graph:\n";

3280 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;

3282 if (Node->isRemoved())

3283 continue;

3284 Node->print(OS);

3285 OS << "\n";

3286 }

3287}

3288

3289template <typename DerivedCCG, typename FuncTy, typename CallTy>

3290void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(

3291 raw_ostream &OS) const {

3292 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;

3294 if (Node->isRemoved())

3295 continue;

3296 if (Node->IsAllocation)

3297 continue;

3298 DenseSet<uint32_t> ContextIds = Node->getContextIds();

3299 auto AllocTypeFromCall = getAllocationCallType(Node->Call);

3300 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());

3301 std::sort(SortedIds.begin(), SortedIds.end());

3302 for (auto Id : SortedIds) {

3303 auto TypeI = ContextIdToAllocationType.find(Id);

3304 assert(TypeI != ContextIdToAllocationType.end());

3305 auto CSI = ContextIdToContextSizeInfos.find(Id);

3306 if (CSI != ContextIdToContextSizeInfos.end()) {

3307 for (auto &Info : CSI->second) {

3308 OS << "MemProf hinting: "

3310 << " full allocation context " << Info.FullStackId

3311 << " with total size " << Info.TotalSize << " is "

3313 if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)

3315 << " due to cold byte percent";

3316

3317 OS << " (context id " << Id << ")";

3318 OS << "\n";

3319 }

3320 }

3321 }

3322 }

3323}

3324

3325template <typename DerivedCCG, typename FuncTy, typename CallTy>

3326void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {

3327 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;

3330 for (auto &Edge : Node->CallerEdges)

3332 }

3333}

3334

3335template <typename DerivedCCG, typename FuncTy, typename CallTy>

3336struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {

3337 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;

3338 using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;

3339

3340 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;

3342

3346

3350

3354

3356 return G->NodeOwner.begin()->get();

3357 }

3358

3359 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;

3360 static const ContextNode<DerivedCCG, FuncTy, CallTy> *

3362 return P->Callee;

3363 }

3364

3366 mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<

3369

3373

3377};

3378

3379template <typename DerivedCCG, typename FuncTy, typename CallTy>

3383

3384

3385

3386

3387 DoHighlight =

3391 }

3392

3393 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;

3397

3399 std::string LabelString =

3400 (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +

3401 Twine(Node->OrigStackOrAllocId) + " NodeId: " + Twine(Node->NodeId))

3402 .str();

3403 LabelString += "\n";

3404 if (Node->hasCall()) {

3405 auto Func = G->NodeToCallingFunc.find(Node);

3406 assert(Func != G->NodeToCallingFunc.end());

3407 LabelString +=

3408 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());

3409 } else {

3410 LabelString += "null call";

3411 if (Node->Recursive)

3412 LabelString += " (recursive)";

3413 else

3414 LabelString += " (external)";

3415 }

3416 return LabelString;

3417 }

3418

3420 auto ContextIds = Node->getContextIds();

3421

3422

3423

3424 bool Highlight = false;

3425 if (DoHighlight) {

3430 else

3431 Highlight = set_intersects(ContextIds, G->DotAllocContextIds);

3432 }

3433 std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +

3434 getContextIds(ContextIds) + "\"")

3435 .str();

3436

3437 if (Highlight)

3438 AttributeString += ",fontsize=\"30\"";

3439 AttributeString +=

3440 (Twine(",fillcolor=\"") + getColor(Node->AllocTypes, Highlight) + "\"")

3441 .str();

3442 if (Node->CloneOf) {

3443 AttributeString += ",color=\"blue\"";

3444 AttributeString += ",style=\"filled,bold,dashed\"";

3445 } else

3446 AttributeString += ",style=\"filled\"";

3447 return AttributeString;

3448 }

3449

3452 auto &Edge = *(ChildIter.getCurrent());

3453

3454

3455

3456

3457 bool Highlight = false;

3458 if (DoHighlight) {

3463 else

3464 Highlight = set_intersects(Edge->ContextIds, G->DotAllocContextIds);

3465 }

3466 auto Color = getColor(Edge->AllocTypes, Highlight);

3467 std::string AttributeString =

3468 (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +

3469

3470 Twine(",fillcolor=\"") + Color + "\"" + Twine(",color=\"") + Color +

3471 "\"")

3472 .str();

3473 if (Edge->IsBackedge)

3474 AttributeString += ",style=\"dotted\"";

3475

3476 if (Highlight)

3477 AttributeString += ",penwidth=\"2.0\",weight=\"2\"";

3478 return AttributeString;

3479 }

3480

3481

3482

3484 if (Node->isRemoved())

3485 return true;

3486

3487

3492 return false;

3493 }

3494

3495private:

3496 static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {

3497 std::string IdString = "ContextIds:";

3498 if (ContextIds.size() < 100) {

3499 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());

3500 std::sort(SortedIds.begin(), SortedIds.end());

3501 for (auto Id : SortedIds)

3502 IdString += (" " + Twine(Id)).str();

3503 } else {

3504 IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();

3505 }

3506 return IdString;

3507 }

3508

3509 static std::string getColor(uint8_t AllocTypes, bool Highlight) {

3510

3511

3512

3513

3514

3515 if (AllocTypes == (uint8_t)AllocationType::NotCold)

3516

3517 return DoHighlight || Highlight ? "brown1" : "lightpink";

3518 if (AllocTypes == (uint8_t)AllocationType::Cold)

3519 return DoHighlight || Highlight ? "cyan" : "lightskyblue";

3520 if (AllocTypes ==

3521 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))

3522 return Highlight ? "magenta" : "mediumorchid1";

3523 return "gray";

3524 }

3525

3526 static std::string getNodeId(NodeRef Node) {

3527 std::stringstream SStream;

3528 SStream << std::hex << "N0x" << (unsigned long long)Node;

3529 std::string Result = SStream.str();

3531 }

3532

3533

3534

3536};

3537

3538template <typename DerivedCCG, typename FuncTy, typename CallTy>

3539bool DOTGraphTraits<

3540 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>::DoHighlight =

3541 false;

3542

3543template <typename DerivedCCG, typename FuncTy, typename CallTy>

3544void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(

3545 std::string Label) const {

3548}

3549

3550template <typename DerivedCCG, typename FuncTy, typename CallTy>

3551typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *

3552CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(

3553 const std::shared_ptr &Edge,

3554 DenseSet<uint32_t> ContextIdsToMove) {

3555 ContextNode *Node = Edge->Callee;

3556 assert(NodeToCallingFunc.count(Node));

3557 ContextNode *Clone =

3558 createNewNode(Node->IsAllocation, NodeToCallingFunc[Node], Node->Call);

3559 Node->addClone(Clone);

3560 Clone->MatchingCalls = Node->MatchingCalls;

3561 moveEdgeToExistingCalleeClone(Edge, Clone, true,

3562 ContextIdsToMove);

3563 return Clone;

3564}

3565

3566template <typename DerivedCCG, typename FuncTy, typename CallTy>

3567void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::

3568 moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge,

3569 ContextNode *NewCallee, bool NewClone,

3570 DenseSet<uint32_t> ContextIdsToMove) {

3571

3572

3573 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());

3574

3575 bool EdgeIsRecursive = Edge->Callee == Edge->Caller;

3576

3577 ContextNode *OldCallee = Edge->Callee;

3578

3579

3580

3581 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);

3582

3583

3584

3585 if (ContextIdsToMove.empty())

3586 ContextIdsToMove = Edge->getContextIds();

3587

3588

3589

3590 if (Edge->getContextIds().size() == ContextIdsToMove.size()) {

3591

3592

3593 NewCallee->AllocTypes |= Edge->AllocTypes;

3594

3595 if (ExistingEdgeToNewCallee) {

3596

3597

3598 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);

3599 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;

3600 assert(Edge->ContextIds == ContextIdsToMove);

3601 removeEdgeFromGraph(Edge.get());

3602 } else {

3603

3604 Edge->Callee = NewCallee;

3605 NewCallee->CallerEdges.push_back(Edge);

3606

3607 OldCallee->eraseCallerEdge(Edge.get());

3608

3609

3610 }

3611 } else {

3612

3613

3614 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);

3615 if (ExistingEdgeToNewCallee) {

3616

3617

3618 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);

3619 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;

3620 } else {

3621

3622 auto NewEdge = std::make_shared(

3623 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);

3624 Edge->Caller->CalleeEdges.push_back(NewEdge);

3625 NewCallee->CallerEdges.push_back(NewEdge);

3626 }

3627

3628

3629 NewCallee->AllocTypes |= CallerEdgeAllocType;

3631 Edge->AllocTypes = computeAllocType(Edge->ContextIds);

3632 }

3633

3634

3635

3636 for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {

3637 ContextNode *CalleeToUse = OldCalleeEdge->Callee;

3638

3639

3640

3641 if (CalleeToUse == OldCallee) {

3642

3643

3644

3645 if (EdgeIsRecursive) {

3647 continue;

3648 }

3649 CalleeToUse = NewCallee;

3650 }

3651

3652

3653 DenseSet<uint32_t> EdgeContextIdsToMove =

3654 set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove);

3655 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);

3656 OldCalleeEdge->AllocTypes =

3657 computeAllocType(OldCalleeEdge->getContextIds());

3658 if (!NewClone) {

3659

3660

3661

3662

3663

3664 if (auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {

3665 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);

3666 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);

3667 continue;

3668 }

3669 }

3670 auto NewEdge = std::make_shared(

3671 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),

3672 EdgeContextIdsToMove);

3673 NewCallee->CalleeEdges.push_back(NewEdge);

3674 NewEdge->Callee->CallerEdges.push_back(NewEdge);

3675 }

3676

3677

3678 OldCallee->AllocTypes = OldCallee->computeAllocType();

3679

3680 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==

3681 OldCallee->emptyContextIds());

3685 for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)

3687 false);

3688 for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)

3690 false);

3691 }

3692}

3693

3694template <typename DerivedCCG, typename FuncTy, typename CallTy>

3695void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::

3696 moveCalleeEdgeToNewCaller(const std::shared_ptr &Edge,

3697 ContextNode *NewCaller) {

3698 auto *OldCallee = Edge->Callee;

3699 auto *NewCallee = OldCallee;

3700

3701

3702 bool Recursive = Edge->Caller == Edge->Callee;

3703 if (Recursive)

3704 NewCallee = NewCaller;

3705

3706 ContextNode *OldCaller = Edge->Caller;

3707 OldCaller->eraseCalleeEdge(Edge.get());

3708

3709

3710

3711 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);

3712

3713 if (ExistingEdgeToNewCaller) {

3714

3715

3716 ExistingEdgeToNewCaller->getContextIds().insert_range(

3717 Edge->getContextIds());

3718 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;

3719 Edge->ContextIds.clear();

3720 Edge->AllocTypes = (uint8_t)AllocationType::None;

3721 OldCallee->eraseCallerEdge(Edge.get());

3722 } else {

3723

3724 Edge->Caller = NewCaller;

3725 NewCaller->CalleeEdges.push_back(Edge);

3726 if (Recursive) {

3727 assert(NewCallee == NewCaller);

3728

3729

3730 Edge->Callee = NewCallee;

3731 NewCallee->CallerEdges.push_back(Edge);

3732 OldCallee->eraseCallerEdge(Edge.get());

3733 }

3734

3735

3736 }

3737

3738 NewCaller->AllocTypes |= Edge->AllocTypes;

3739

3740

3741

3742

3743

3744#ifndef NDEBUG

3745 bool IsNewNode = NewCaller->CallerEdges.empty();

3746#endif

3747

3748

3749

3750

3751

3752

3753 if (!Recursive) {

3754 for (auto &OldCallerEdge : OldCaller->CallerEdges) {

3755 auto OldCallerCaller = OldCallerEdge->Caller;

3756

3757

3759 OldCallerEdge->getContextIds(), Edge->getContextIds());

3760 if (OldCaller == OldCallerCaller) {

3761 OldCallerCaller = NewCaller;

3762

3763

3764

3765 continue;

3766 }

3767 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);

3768 OldCallerEdge->AllocTypes =

3769 computeAllocType(OldCallerEdge->getContextIds());

3770

3771

3772

3773

3774 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);

3775

3776

3778 if (ExistingCallerEdge) {

3779 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);

3780 ExistingCallerEdge->AllocTypes |=

3781 computeAllocType(EdgeContextIdsToMove);

3782 continue;

3783 }

3784 auto NewEdge = std::make_shared(

3785 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),

3786 EdgeContextIdsToMove);

3787 NewCaller->CallerEdges.push_back(NewEdge);

3788 NewEdge->Caller->CalleeEdges.push_back(NewEdge);

3789 }

3790 }

3791

3792

3793 OldCaller->AllocTypes = OldCaller->computeAllocType();

3794

3795 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==

3796 OldCaller->emptyContextIds());

3800 for (const auto &OldCallerEdge : OldCaller->CallerEdges)

3802 false);

3803 for (const auto &NewCallerEdge : NewCaller->CallerEdges)

3805 false);

3806 }

3807}

3808

3809template <typename DerivedCCG, typename FuncTy, typename CallTy>

3810void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::

3811 recursivelyRemoveNoneTypeCalleeEdges(

3812 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {

3815 return;

3816

3817 removeNoneTypeCalleeEdges(Node);

3818

3819 for (auto *Clone : Node->Clones)

3820 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);

3821

3822

3823

3824 auto CallerEdges = Node->CallerEdges;

3825 for (auto &Edge : CallerEdges) {

3826

3827 if (Edge->isRemoved()) {

3829 continue;

3830 }

3831 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);

3832 }

3833}

3834

3835

3836template <typename DerivedCCG, typename FuncTy, typename CallTy>

3837void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {

3838

3839

3841 return;

3842 DenseSet<const ContextNode *> Visited;

3843 DenseSet<const ContextNode *> CurrentStack;

3844 for (auto &Entry : NonAllocationCallToContextNodeMap) {

3846 if (Node->isRemoved())

3847 continue;

3848

3849 if (Node->CallerEdges.empty())

3850 continue;

3851 markBackedges(Node, Visited, CurrentStack);

3853 }

3854}

3855

3856

3857template <typename DerivedCCG, typename FuncTy, typename CallTy>

3858void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(

3859 ContextNode *Node, DenseSet<const ContextNode *> &Visited,

3860 DenseSet<const ContextNode *> &CurrentStack) {

3861 auto I = Visited.insert(Node);

3862

3864 (void)I;

3865 for (auto &CalleeEdge : Node->CalleeEdges) {

3866 auto *Callee = CalleeEdge->Callee;

3867 if (Visited.count(Callee)) {

3868

3869

3870 if (CurrentStack.count(Callee))

3871 CalleeEdge->IsBackedge = true;

3872 continue;

3873 }

3874 CurrentStack.insert(Callee);

3875 markBackedges(Callee, Visited, CurrentStack);

3876 CurrentStack.erase(Callee);

3877 }

3878}

3879

3880template <typename DerivedCCG, typename FuncTy, typename CallTy>

3881void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {

3882 DenseSet<const ContextNode *> Visited;

3883 for (auto &Entry : AllocationCallToContextNodeMap) {

3884 Visited.clear();

3885 identifyClones(Entry.second, Visited, Entry.second->getContextIds());

3886 }

3887 Visited.clear();

3888 for (auto &Entry : AllocationCallToContextNodeMap)

3889 recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited);

3891 check();

3892}

3893

3894

3901

3902template <typename DerivedCCG, typename FuncTy, typename CallTy>

3903void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(

3904 ContextNode *Node, DenseSet<const ContextNode *> &Visited,

3905 const DenseSet<uint32_t> &AllocContextIds) {

3909

3910

3911

3912

3913

3914

3915 if (Node->hasCall())

3916 return;

3917

3918

3920 return;

3921

3922#ifndef NDEBUG

3924#endif

3925 Visited.insert(Node);

3926

3928

3929

3930

3931

3932

3933 {

3934 auto CallerEdges = Node->CallerEdges;

3935 for (auto &Edge : CallerEdges) {

3936

3937 if (Edge->isRemoved()) {

3939 continue;

3940 }

3941

3942

3943 if (Edge->IsBackedge) {

3944

3945

3947 continue;

3948 }

3949

3950 if (!Visited.count(Edge->Caller) && Edge->Caller->CloneOf) {

3951 identifyClones(Edge->Caller, Visited, AllocContextIds);

3952 }

3953 }

3954 }

3955

3956

3958 return;

3959

3960

3961

3962

3963

3964

3965

3966

3967

3968

3969

3970

3971

3972

3973

3974 const unsigned AllocTypeCloningPriority[] = { 3, 4,

3975 1,

3976 2};

3978 [&](const std::shared_ptr &A,

3979 const std::shared_ptr &B) {

3980

3981

3982 if (A->ContextIds.empty())

3983

3984

3985

3986

3987 return false;

3988 if (B->ContextIds.empty())

3989 return true;

3990

3991 if (A->AllocTypes == B->AllocTypes)

3992

3993

3994 return *A->ContextIds.begin() < *B->ContextIds.begin();

3995 return AllocTypeCloningPriority[A->AllocTypes] <

3996 AllocTypeCloningPriority[B->AllocTypes];

3997 });

3998

3999 assert(Node->AllocTypes != (uint8_t)AllocationType::None);

4000

4001 DenseSet<uint32_t> RecursiveContextIds;

4003

4004

4006 DenseSet<uint32_t> AllCallerContextIds;

4007 for (auto &CE : Node->CallerEdges) {

4008

4009

4010 AllCallerContextIds.reserve(CE->getContextIds().size());

4011 for (auto Id : CE->getContextIds())

4012 if (!AllCallerContextIds.insert(Id).second)

4013 RecursiveContextIds.insert(Id);

4014 }

4015 }

4016

4017

4018

4019

4020

4021

4022

4023 auto CallerEdges = Node->CallerEdges;

4024 for (auto &CallerEdge : CallerEdges) {

4025

4026 if (CallerEdge->isRemoved()) {

4028 continue;

4029 }

4030 assert(CallerEdge->Callee == Node);

4031

4032

4033

4035 break;

4036

4037

4038

4039 if (!CallerEdge->Caller->hasCall())

4040 continue;

4041

4042

4043

4044 auto CallerEdgeContextsForAlloc =

4045 set_intersection(CallerEdge->getContextIds(), AllocContextIds);

4046 if (!RecursiveContextIds.empty())

4047 CallerEdgeContextsForAlloc =

4048 set_difference(CallerEdgeContextsForAlloc, RecursiveContextIds);

4049 if (CallerEdgeContextsForAlloc.empty())

4050 continue;

4051

4052 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);

4053

4054

4055

4056 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;

4057 CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());

4058 for (auto &CalleeEdge : Node->CalleeEdges)

4059 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(

4060 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));

4061

4062

4063

4064

4065

4066

4067

4068

4069

4070

4071

4072

4073

4074

4075

4076 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);

4077 assert(Node->AllocTypes != (uint8_t)AllocationType::None);

4078 if (!CallerEdge->IsBackedge &&

4079 allocTypeToUse(CallerAllocTypeForAlloc) ==

4080 allocTypeToUse(Node->AllocTypes) &&

4081 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(

4082 CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {

4083 continue;

4084 }

4085

4086 if (CallerEdge->IsBackedge) {

4087

4088

4090 DeferredBackedges++;

4091 }

4092

4093

4094

4095

4096

4097

4098

4099

4100

4101

4102

4103 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&

4104 !Visited.count(CallerEdge->Caller)) {

4105 const auto OrigIdCount = CallerEdge->getContextIds().size();

4106

4107

4108 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);

4109 removeNoneTypeCalleeEdges(CallerEdge->Caller);

4110

4111

4112

4113 bool UpdatedEdge = false;

4114 if (OrigIdCount > CallerEdge->getContextIds().size()) {

4115 for (auto E : Node->CallerEdges) {

4116

4117 if (E->Caller->CloneOf != CallerEdge->Caller)

4118 continue;

4119

4120

4121 auto CallerEdgeContextsForAllocNew =

4122 set_intersection(CallerEdgeContextsForAlloc, E->getContextIds());

4123 if (CallerEdgeContextsForAllocNew.empty())

4124 continue;

4125

4126

4127

4129 continue;

4130

4131

4132

4133 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);

4134 CallerEdge = E;

4135 UpdatedEdge = true;

4136 break;

4137 }

4138 }

4139

4140

4141

4142

4143 if (CallerEdge->isRemoved())

4144 continue;

4145

4146

4147

4148

4149

4150

4151 if (!UpdatedEdge) {

4153 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());

4154 if (CallerEdgeContextsForAlloc.empty())

4155 continue;

4156 }

4157

4158

4159 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);

4160 CalleeEdgeAllocTypesForCallerEdge.clear();

4161 for (auto &CalleeEdge : Node->CalleeEdges) {

4162 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(

4163 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));

4164 }

4165 }

4166

4167

4168

4169 ContextNode *Clone = nullptr;

4170 for (auto *CurClone : Node->Clones) {

4171 if (allocTypeToUse(CurClone->AllocTypes) !=

4172 allocTypeToUse(CallerAllocTypeForAlloc))

4173 continue;

4174

4177

4178

4179 assert(!BothSingleAlloc ||

4180 CurClone->AllocTypes == CallerAllocTypeForAlloc);

4181

4182

4183

4184

4185

4186 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(

4187 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {

4188 Clone = CurClone;

4189 break;

4190 }

4191 }

4192

4193

4194 if (Clone)

4195 moveEdgeToExistingCalleeClone(CallerEdge, Clone, false,

4196 CallerEdgeContextsForAlloc);

4197 else

4198 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);

4199

4200

4201 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);

4202 }

4203

4204

4206

4207

4208 assert(Node->AllocTypes != (uint8_t)AllocationType::None);

4209

4212}

4213

4214void ModuleCallsiteContextGraph::updateAllocationCall(

4219 "memprof", AllocTypeString);

4222 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())

4223 << ore::NV("AllocationCall", Call.call()) << " in clone "

4225 << " marked with memprof allocation attribute "

4226 << ore::NV("Attribute", AllocTypeString));

4227}

4228

4229void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,

4233 assert(AI->Versions.size() > Call.cloneNo());

4234 AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;

4235}

4236

4238ModuleCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {

4240 if (!CB->getAttributes().hasFnAttr("memprof"))

4241 return AllocationType::None;

4242 return CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"

4243 ? AllocationType::Cold

4244 : AllocationType::NotCold;

4245}

4246

4248IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {

4250 assert(AI->Versions.size() > Call.cloneNo());

4252}

4253

4254void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,

4255 FuncInfo CalleeFunc) {

4256 auto *CurF = getCalleeFunc(CallerCall.call());

4257 auto NewCalleeCloneNo = CalleeFunc.cloneNo();

4259

4260

4261

4262

4264 if (CurCalleeCloneNo != NewCalleeCloneNo) {

4265 LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "

4266 << CurCalleeCloneNo << " now " << NewCalleeCloneNo

4267 << "\n");

4268 MismatchedCloneAssignments++;

4269 }

4270 }

4271 if (NewCalleeCloneNo > 0)

4272 cast(CallerCall.call())->setCalledFunction(CalleeFunc.func());

4273 OREGetter(CallerCall.call()->getFunction())

4274 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())

4275 << ore::NV("Call", CallerCall.call()) << " in clone "

4276 << ore::NV("Caller", CallerCall.call()->getFunction())

4277 << " assigned to call function clone "

4278 << ore::NV("Callee", CalleeFunc.func()));

4279}

4280

4281void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,

4282 FuncInfo CalleeFunc) {

4285 "Caller cannot be an allocation which should not have profiled calls");

4286 assert(CI->Clones.size() > CallerCall.cloneNo());

4287 auto NewCalleeCloneNo = CalleeFunc.cloneNo();

4288 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];

4289

4290

4291

4292

4293 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {

4294 LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "

4295 << CurCalleeCloneNo << " now " << NewCalleeCloneNo

4296 << "\n");

4297 MismatchedCloneAssignments++;

4298 }

4299 CurCalleeCloneNo = NewCalleeCloneNo;

4300}

4301

4302

4303

4304

4308 if (!SP)

4309 return;

4311 SP->replaceLinkageName(MDName);

4313 if (!Decl)

4314 return;

4315 TempDISubprogram NewDecl = Decl->clone();

4316 NewDecl->replaceLinkageName(MDName);

4318}

4319

4320CallsiteContextGraph<ModuleCallsiteContextGraph, Function,

4322ModuleCallsiteContextGraph::cloneFunctionForCallsite(

4323 FuncInfo &Func, CallInfo &Call, DenseMap<CallInfo, CallInfo> &CallMap,

4324 std::vector &CallsWithMetadataInFunc, unsigned CloneNo) {

4325

4329 assert(Func.func()->getParent()->getFunction(Name));

4330 NewFunc->setName(Name);

4332 for (auto &Inst : CallsWithMetadataInFunc) {

4333

4334 assert(Inst.cloneNo() == 0);

4335 CallMap[Inst] = {cast(VMap[Inst.call()]), CloneNo};

4336 }

4337 OREGetter(Func.func())

4338 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())

4339 << "created clone " << ore::NV("NewFunction", NewFunc));

4340 return {NewFunc, CloneNo};

4341}

4342

4343CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,

4344 IndexCall>::FuncInfo

4345IndexCallsiteContextGraph::cloneFunctionForCallsite(

4346 FuncInfo &Func, CallInfo &Call, DenseMap<CallInfo, CallInfo> &CallMap,

4347 std::vector &CallsWithMetadataInFunc, unsigned CloneNo) {

4348

4349

4350

4351

4355

4356

4357

4358

4359

4360

4361 for (auto &Inst : CallsWithMetadataInFunc) {

4362

4363 assert(Inst.cloneNo() == 0);

4365 assert(AI->Versions.size() == CloneNo);

4366

4367

4368 AI->Versions.push_back(0);

4369 } else {

4371 assert(CI && CI->Clones.size() == CloneNo);

4372

4373

4374 CI->Clones.push_back(0);

4375 }

4376 CallMap[Inst] = {Inst.call(), CloneNo};

4377 }

4378 return {Func.func(), CloneNo};

4379}

4380

4381

4382

4383

4384

4385

4386

4387

4388

4389

4390

4391

4392

4393

4394

4395template <typename DerivedCCG, typename FuncTy, typename CallTy>

4396void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {

4398 return;

4399

4400

4401

4402 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;

4403 for (auto &Entry : AllocationCallToContextNodeMap) {

4405 for (auto Id : Node->getContextIds())

4406 ContextIdToAllocationNode[Id] = Node->getOrigNode();

4407 for (auto *Clone : Node->Clones) {

4408 for (auto Id : Clone->getContextIds())

4409 ContextIdToAllocationNode[Id] = Clone->getOrigNode();

4410 }

4411 }

4412

4413

4414

4415

4416 DenseSet<const ContextNode *> Visited;

4417 for (auto &Entry : AllocationCallToContextNodeMap) {

4419

4420 mergeClones(Node, Visited, ContextIdToAllocationNode);

4421

4422

4423

4424

4425

4426 auto Clones = Node->Clones;

4427 for (auto *Clone : Clones)

4428 mergeClones(Clone, Visited, ContextIdToAllocationNode);

4429 }

4430

4432 dbgs() << "CCG after merging:\n";

4433 dbgs() << *this;

4434 }

4436 exportToDot("aftermerge");

4437

4439 check();

4440 }

4441}

4442

4443

4444template <typename DerivedCCG, typename FuncTy, typename CallTy>

4445void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(

4446 ContextNode *Node, DenseSet<const ContextNode *> &Visited,

4447 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {

4450 return;

4451

4452

4453

4454

4455

4456

4457 bool FoundUnvisited = true;

4458 unsigned Iters = 0;

4459 while (FoundUnvisited) {

4460 Iters++;

4461 FoundUnvisited = false;

4462

4463

4464 auto CallerEdges = Node->CallerEdges;

4465 for (auto CallerEdge : CallerEdges) {

4466

4467 if (CallerEdge->Callee != Node)

4468 continue;

4469

4470

4472 FoundUnvisited = true;

4473 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);

4474 }

4475 }

4476

4477 TotalMergeInvokes++;

4478 TotalMergeIters += Iters;

4479 if (Iters > MaxMergeIters)

4480 MaxMergeIters = Iters;

4481

4482

4483 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);

4484}

4485

4486template <typename DerivedCCG, typename FuncTy, typename CallTy>

4487void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(

4488 ContextNode *Node, DenseSet<const ContextNode *> &Visited,

4489 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {

4490

4491 if (Node->emptyContextIds())

4492 return;

4493

4494

4495

4496 MapVector<ContextNode *, std::vector<std::shared_ptr>>

4497 OrigNodeToCloneEdges;

4498 for (const auto &E : Node->CalleeEdges) {

4499 auto *Callee = E->Callee;

4500 if (Callee->CloneOf && Callee->Clones.empty())

4501 continue;

4502 ContextNode *Base = Callee->getOrigNode();

4503 OrigNodeToCloneEdges[Base].push_back(E);

4504 }

4505

4506

4507

4508

4509 auto CalleeCallerEdgeLessThan = [](const std::shared_ptr &A,

4510 const std::shared_ptr &B) {

4511 if (A->Callee->CallerEdges.size() != B->Callee->CallerEdges.size())

4512 return A->Callee->CallerEdges.size() < B->Callee->CallerEdges.size();

4513 if (A->Callee->CloneOf && B->Callee->CloneOf)

4514 return true;

4515 else if (A->Callee->CloneOf && B->Callee->CloneOf)

4516 return false;

4517

4518

4519 return *A->ContextIds.begin() < *B->ContextIds.begin();

4520 };

4521

4522

4523

4524 for (auto Entry : OrigNodeToCloneEdges) {

4525

4526

4527 auto &CalleeEdges = Entry.second;

4528 auto NumCalleeClones = CalleeEdges.size();

4529

4530 if (NumCalleeClones == 1)

4531 continue;

4532

4533

4534

4535

4537

4538

4539

4540

4541 DenseSet<ContextNode *> OtherCallersToShareMerge;

4542 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,

4543 OtherCallersToShareMerge);

4544

4545

4546

4547

4548 ContextNode *MergeNode = nullptr;

4549 DenseMap<ContextNode *, unsigned> CallerToMoveCount;

4550 for (auto CalleeEdge : CalleeEdges) {

4551 auto *OrigCallee = CalleeEdge->Callee;

4552

4553

4554

4555 if (!MergeNode) {

4556

4557 if (CalleeEdge->Callee->CallerEdges.size() == 1) {

4558 MergeNode = OrigCallee;

4559 NonNewMergedNodes++;

4560 continue;

4561 }

4562

4563

4564

4565

4566 if (!OtherCallersToShareMerge.empty()) {

4567 bool MoveAllCallerEdges = true;

4568 for (auto CalleeCallerE : OrigCallee->CallerEdges) {

4569 if (CalleeCallerE == CalleeEdge)

4570 continue;

4571 if (!OtherCallersToShareMerge.contains(CalleeCallerE->Caller)) {

4572 MoveAllCallerEdges = false;

4573 break;

4574 }

4575 }

4576

4577

4578 if (MoveAllCallerEdges) {

4579 MergeNode = OrigCallee;

4580 NonNewMergedNodes++;

4581 continue;

4582 }

4583 }

4584 }

4585

4586 if (MergeNode) {

4587 assert(MergeNode != OrigCallee);

4588 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,

4589 false);

4590 } else {

4591 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);

4592 NewMergedNodes++;

4593 }

4594

4595

4596 if (!OtherCallersToShareMerge.empty()) {

4597

4598

4599

4600 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;

4601 for (auto &CalleeCallerE : OrigCalleeCallerEdges) {

4602 if (CalleeCallerE == CalleeEdge)

4603 continue;

4604 if (!OtherCallersToShareMerge.contains(CalleeCallerE->Caller))

4605 continue;

4606 CallerToMoveCount[CalleeCallerE->Caller]++;

4607 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,

4608 false);

4609 }

4610 }

4611 removeNoneTypeCalleeEdges(OrigCallee);

4612 removeNoneTypeCalleeEdges(MergeNode);

4613 }

4614 }

4615}

4616

4617

4618

4619

4620

4621

4622

4623

4624

4625

4626

4627

4628

4629

4630template <typename DerivedCCG, typename FuncTy, typename CallTy>

4631void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::

4632 findOtherCallersToShareMerge(

4633 ContextNode *Node,

4634 std::vector<std::shared_ptr> &CalleeEdges,

4635 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,

4636 DenseSet<ContextNode *> &OtherCallersToShareMerge) {

4637 auto NumCalleeClones = CalleeEdges.size();

4638

4639

4640 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;

4641

4642

4643 unsigned PossibleOtherCallerNodes = 0;

4644

4645

4646

4647 if (CalleeEdges[0]->Callee->CallerEdges.size() < 2)

4648 return;

4649

4650

4651

4652

4653 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;

4654 for (auto CalleeEdge : CalleeEdges) {

4655 assert(CalleeEdge->Callee->CallerEdges.size() > 1);

4656

4657

4658 for (auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {

4659 if (CalleeCallerEdges->Caller == Node) {

4660 assert(CalleeCallerEdges == CalleeEdge);

4661 continue;

4662 }

4663 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;

4664

4665

4666 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==

4667 NumCalleeClones)

4668 PossibleOtherCallerNodes++;

4669 }

4670

4671

4672 for (auto Id : CalleeEdge->getContextIds()) {

4673 auto *Alloc = ContextIdToAllocationNode.lookup(Id);

4675

4676

4677 MissingAllocForContextId++;

4678 continue;

4679 }

4680 CalleeEdgeToAllocNodes[CalleeEdge.get()].insert(Alloc);

4681 }

4682 }

4683

4684

4685

4686

4687 for (auto CalleeEdge : CalleeEdges) {

4688 assert(CalleeEdge->Callee->CallerEdges.size() > 1);

4689

4690 if (!PossibleOtherCallerNodes)

4691 break;

4692 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];

4693

4694 for (auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {

4695

4696 if (CalleeCallerE == CalleeEdge)

4697 continue;

4698

4699

4700 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=

4701 NumCalleeClones)

4702 continue;

4703

4704

4705 for (auto Id : CalleeCallerE->getContextIds()) {

4706 auto *Alloc = ContextIdToAllocationNode.lookup(Id);

4708 continue;

4709

4710

4711 if (!CurCalleeAllocNodes.contains(Alloc)) {

4712 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;

4713 PossibleOtherCallerNodes--;

4714 break;

4715 }

4716 }

4717 }

4718 }

4719

4720 if (!PossibleOtherCallerNodes)

4721 return;

4722

4723

4724

4725 for (auto &[OtherCaller, Count] : OtherCallersToSharedCalleeEdgeCount) {

4726 if (Count != NumCalleeClones)

4727 continue;

4728 OtherCallersToShareMerge.insert(OtherCaller);

4729 }

4730}

4731

4732

4733

4734

4735

4736

4737

4738

4739

4740

4741

4742

4743

4744

4745

4746

4747

4748

4749

4750

4751

4752

4753

4754

4755

4756

4757

4758

4759

4760

4761

4762

4763

4764

4765

4766

4767

4768

4769

4770

4771

4772

4773template <typename DerivedCCG, typename FuncTy, typename CallTy>

4774bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {

4776

4777 mergeClones();

4778

4779

4780

4781 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;

4782

4783

4784

4785 auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,

4786 const FuncInfo &CalleeFunc) {

4788 CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;

4789 };

4790

4791

4792 struct FuncCloneInfo {

4793

4794 FuncInfo FuncClone;

4795

4796

4797 DenseMap<CallInfo, CallInfo> CallMap;

4798 };

4799

4800

4801

4802

4803

4804

4805

4806

4807

4808

4809

4810

4811

4812

4813

4814

4815

4816

4817

4818

4819

4820

4821

4822

4823

4824

4825 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>

4826 UnassignedCallClones;

4827

4828

4829

4830 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {

4831 FuncInfo OrigFunc(Func);

4832

4833

4834

4835

4836 std::vector FuncCloneInfos;

4837 for (auto &Call : CallsWithMetadata) {

4838 ContextNode *Node = getNodeForInst(Call);

4839

4840

4841

4842 if (!Node || Node->Clones.empty())

4843 continue;

4845 "Not having a call should have prevented cloning");

4846

4847

4848

4849 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;

4850

4851

4852

4853 auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,

4854 CallInfo &Call,

4855 ContextNode *CallsiteClone,

4856 bool IsAlloc) {

4857

4858 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;

4859

4860 assert(FuncCloneInfos.size() > FuncClone.cloneNo());

4861 DenseMap<CallInfo, CallInfo> &CallMap =

4862 FuncCloneInfos[FuncClone.cloneNo()].CallMap;

4863 CallInfo CallClone(Call);

4864 if (auto It = CallMap.find(Call); It != CallMap.end())

4865 CallClone = It->second;

4866 CallsiteClone->setCall(CallClone);

4867

4868 for (auto &MatchingCall : Node->MatchingCalls) {

4869 CallInfo CallClone(MatchingCall);

4870 if (auto It = CallMap.find(MatchingCall); It != CallMap.end())

4871 CallClone = It->second;

4872

4873 MatchingCall = CallClone;

4874 }

4875 };

4876

4877

4878

4879

4880

4881 auto MoveEdgeToNewCalleeCloneAndSetUp =

4882 [&](const std::shared_ptr &Edge) {

4883 ContextNode *OrigCallee = Edge->Callee;

4884 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge);

4885 removeNoneTypeCalleeEdges(NewClone);

4886 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);

4887

4888

4889

4890 if (CallsiteToCalleeFuncCloneMap.count(OrigCallee))

4891 RecordCalleeFuncOfCallsite(

4892 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);

4893 return NewClone;

4894 };

4895

4896

4897

4898

4899 std::deque<ContextNode *> ClonesWorklist;

4900

4901 if (Node->emptyContextIds())

4902 ClonesWorklist.push_back(Node);

4904

4905

4906

4907

4908 unsigned NodeCloneCount = 0;

4909 while (!ClonesWorklist.empty()) {

4910 ContextNode *Clone = ClonesWorklist.front();

4911 ClonesWorklist.pop_front();

4912 NodeCloneCount++;

4915

4916

4917

4918

4919

4920 if (FuncCloneInfos.size() < NodeCloneCount) {

4921

4922 if (NodeCloneCount == 1) {

4923

4924

4925

4927 Clone->CallerEdges, [&](const std::shared_ptr &E) {

4928 return CallsiteToCalleeFuncCloneMap.count(E->Caller);

4929 }));

4930

4931

4932 FuncCloneInfos.push_back(

4933 {OrigFunc, DenseMap<CallInfo, CallInfo>()});

4934 AssignCallsiteCloneToFuncClone(

4935 OrigFunc, Call, Clone,

4936 AllocationCallToContextNodeMap.count(Call));

4937 for (auto &CE : Clone->CallerEdges) {

4938

4939 if (CE->Caller->hasCall())

4940 continue;

4941 RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);

4942 }

4943 continue;

4944 }

4945

4946

4947

4948

4949

4950

4951 FuncInfo PreviousAssignedFuncClone;

4953 Clone->CallerEdges, [&](const std::shared_ptr &E) {

4954 return CallsiteToCalleeFuncCloneMap.count(E->Caller);

4955 });

4956 bool CallerAssignedToCloneOfFunc = false;

4957 if (EI != Clone->CallerEdges.end()) {

4958 const std::shared_ptr &Edge = *EI;

4959 PreviousAssignedFuncClone =

4960 CallsiteToCalleeFuncCloneMap[Edge->Caller];

4961 CallerAssignedToCloneOfFunc = true;

4962 }

4963

4964

4965

4966 DenseMap<CallInfo, CallInfo> NewCallMap;

4967 unsigned CloneNo = FuncCloneInfos.size();

4968 assert(CloneNo > 0 && "Clone 0 is the original function, which "

4969 "should already exist in the map");

4970 FuncInfo NewFuncClone = cloneFunctionForCallsite(

4971 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);

4972 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});

4973 FunctionClonesAnalysis++;

4975

4976

4977

4978

4979 if (!CallerAssignedToCloneOfFunc) {

4980 AssignCallsiteCloneToFuncClone(

4981 NewFuncClone, Call, Clone,

4982 AllocationCallToContextNodeMap.count(Call));

4983 for (auto &CE : Clone->CallerEdges) {

4984

4985 if (CE->Caller->hasCall())

4986 continue;

4987 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);

4988 }

4989 continue;

4990 }

4991

4992

4993

4994

4995

4996

4997

4998

4999 auto CallerEdges = Clone->CallerEdges;

5000 for (auto CE : CallerEdges) {

5001

5002 if (CE->isRemoved()) {

5004 continue;

5005 }

5007

5008 if (CE->Caller->hasCall())

5009 continue;

5010

5011 if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||

5012

5013

5014

5015 CallsiteToCalleeFuncCloneMap[CE->Caller] !=

5016 PreviousAssignedFuncClone)

5017 continue;

5018

5019 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);

5020

5021

5022

5023

5024

5025

5026

5027

5028

5029

5030

5031

5032 auto CalleeEdges = CE->Caller->CalleeEdges;

5033 for (auto CalleeEdge : CalleeEdges) {

5034

5035

5036 if (CalleeEdge->isRemoved()) {

5038 continue;

5039 }

5041 ContextNode *Callee = CalleeEdge->Callee;

5042

5043

5044

5045 if (Callee == Clone || Callee->hasCall())

5046 continue;

5047

5048

5049

5050 if (Callee == CalleeEdge->Caller)

5051 continue;

5052 ContextNode *NewClone =

5053 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);

5054

5055

5056 removeNoneTypeCalleeEdges(Callee);

5057

5058

5059

5060

5061

5062

5063

5064 CallInfo OrigCall(Callee->getOrigNode()->Call);

5065 OrigCall.setCloneNo(0);

5066 DenseMap<CallInfo, CallInfo> &CallMap =

5067 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;

5069 CallInfo NewCall(CallMap[OrigCall]);

5071 NewClone->setCall(NewCall);

5072

5073 for (auto &MatchingCall : NewClone->MatchingCalls) {

5074 CallInfo OrigMatchingCall(MatchingCall);

5075 OrigMatchingCall.setCloneNo(0);

5076 assert(CallMap.count(OrigMatchingCall));

5077 CallInfo NewCall(CallMap[OrigMatchingCall]);

5079

5080 MatchingCall = NewCall;

5081 }

5082 }

5083 }

5084

5085

5086

5087 }

5088

5089 auto FindFirstAvailFuncClone = [&]() {

5090

5091

5092

5093

5094 for (auto &CF : FuncCloneInfos) {

5095 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))

5096 return CF.FuncClone;

5097 }

5099 "Expected an available func clone for this callsite clone");

5100 };

5101

5102

5103

5104

5105

5106

5107

5108

5109

5110

5111

5112

5113

5114

5115

5116 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;

5117 FuncInfo FuncCloneAssignedToCurCallsiteClone;

5118

5119

5120

5121 auto CloneCallerEdges = Clone->CallerEdges;

5122 for (auto &Edge : CloneCallerEdges) {

5123

5124

5125

5126 if (Edge->isRemoved())

5127 continue;

5128

5129 if (Edge->Caller->hasCall())

5130 continue;

5131

5132

5133 if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {

5134 FuncInfo FuncCloneCalledByCaller =

5135 CallsiteToCalleeFuncCloneMap[Edge->Caller];

5136

5137

5138

5139

5140

5141

5142

5143

5144

5145 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&

5146 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=

5147 Clone) ||

5148

5149

5150

5151

5152

5153

5154 (FuncCloneAssignedToCurCallsiteClone &&

5155 FuncCloneAssignedToCurCallsiteClone !=

5156 FuncCloneCalledByCaller)) {

5157

5158

5159

5160

5161

5162

5163

5164

5165

5166

5167

5168

5169

5170

5171 if (FuncCloneToNewCallsiteCloneMap.count(

5172 FuncCloneCalledByCaller)) {

5173 ContextNode *NewClone =

5174 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];

5175 moveEdgeToExistingCalleeClone(Edge, NewClone);

5176

5177 removeNoneTypeCalleeEdges(NewClone);

5178 } else {

5179

5180 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(Edge);

5181 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =

5182 NewClone;

5183

5184 ClonesWorklist.push_back(NewClone);

5185 }

5186

5187

5188 removeNoneTypeCalleeEdges(Clone);

5189

5190

5191 continue;

5192 }

5193

5194

5195

5196 if (!FuncCloneAssignedToCurCallsiteClone) {

5197 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;

5198

5199 AssignCallsiteCloneToFuncClone(

5200 FuncCloneCalledByCaller, Call, Clone,

5201 AllocationCallToContextNodeMap.count(Call));

5202 } else

5203

5204

5205 assert(FuncCloneAssignedToCurCallsiteClone ==

5206 FuncCloneCalledByCaller);

5207

5208 } else {

5209

5210

5211

5212

5213

5214

5215 if (!FuncCloneAssignedToCurCallsiteClone) {

5216 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();

5217 assert(FuncCloneAssignedToCurCallsiteClone);

5218

5219 AssignCallsiteCloneToFuncClone(

5220 FuncCloneAssignedToCurCallsiteClone, Call, Clone,

5221 AllocationCallToContextNodeMap.count(Call));

5222 } else

5223 assert(FuncCloneToCurNodeCloneMap

5224 [FuncCloneAssignedToCurCallsiteClone] == Clone);

5225

5226 RecordCalleeFuncOfCallsite(Edge->Caller,

5227 FuncCloneAssignedToCurCallsiteClone);

5228 }

5229 }

5230

5231

5232

5233

5234

5235

5236

5237

5238

5239

5240

5241

5242

5243

5244

5245

5246

5247 if (!FuncCloneAssignedToCurCallsiteClone) {

5248 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();

5249 assert(FuncCloneAssignedToCurCallsiteClone &&

5250 "No available func clone for this callsite clone");

5251 AssignCallsiteCloneToFuncClone(

5252 FuncCloneAssignedToCurCallsiteClone, Call, Clone,

5253 AllocationCallToContextNodeMap.contains(Call));

5254 }

5255 }

5258 for (const auto &PE : Node->CalleeEdges)

5260 for (const auto &CE : Node->CallerEdges)

5262 for (auto *Clone : Node->Clones) {

5264 for (const auto &PE : Clone->CalleeEdges)

5266 for (const auto &CE : Clone->CallerEdges)

5268 }

5269 }

5270 }

5271

5272 if (FuncCloneInfos.size() < 2)

5273 continue;

5274

5275

5276

5277

5278 for (auto &Call : CallsWithMetadata) {

5279 ContextNode *Node = getNodeForInst(Call);

5280 if (!Node || Node->hasCall() || Node->emptyContextIds())

5281 continue;

5282

5283

5284

5285

5286 if (Node->Clones.size() + 1 >= FuncCloneInfos.size())

5287 continue;

5288

5289

5290 DenseSet NodeCallClones;

5291 for (auto *C : Node->Clones)

5292 NodeCallClones.insert(C->Call.cloneNo());

5293 unsigned I = 0;

5294

5295 for (auto &FC : FuncCloneInfos) {

5296

5297 assert(FC.FuncClone.cloneNo() == I);

5298

5299

5300 if (++I == 1 || NodeCallClones.contains(I)) {

5301 continue;

5302 }

5303

5304

5305 auto &CallVector = UnassignedCallClones[Node][I];

5306 DenseMap<CallInfo, CallInfo> &CallMap = FC.CallMap;

5307 if (auto It = CallMap.find(Call); It != CallMap.end()) {

5308 CallInfo CallClone = It->second;

5309 CallVector.push_back(CallClone);

5310 } else {

5311

5312

5313 assert(false && "Expected to find call in CallMap");

5314 }

5315

5316 for (auto &MatchingCall : Node->MatchingCalls) {

5317 if (auto It = CallMap.find(MatchingCall); It != CallMap.end()) {

5318 CallInfo CallClone = It->second;

5319 CallVector.push_back(CallClone);

5320 } else {

5321

5322

5323 assert(false && "Expected to find call in CallMap");

5324 }

5325 }

5326 }

5327 }

5328 }

5329

5330 uint8_t BothTypes =

5331 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;

5332

5333 auto UpdateCalls = [&](ContextNode *Node,

5334 DenseSet<const ContextNode *> &Visited,

5335 auto &&UpdateCalls) {

5336 auto Inserted = Visited.insert(Node);

5338 return;

5339

5340 for (auto *Clone : Node->Clones)

5341 UpdateCalls(Clone, Visited, UpdateCalls);

5342

5343 for (auto &Edge : Node->CallerEdges)

5344 UpdateCalls(Edge->Caller, Visited, UpdateCalls);

5345

5346

5347

5348 if (Node->hasCall() || Node->emptyContextIds())

5349 return;

5350

5351 if (Node->IsAllocation) {

5352 auto AT = allocTypeToUse(Node->AllocTypes);

5353

5354

5355

5356

5358 !ContextIdToContextSizeInfos.empty()) {

5359 uint64_t TotalCold = 0;

5360 uint64_t Total = 0;

5361 for (auto Id : Node->getContextIds()) {

5362 auto TypeI = ContextIdToAllocationType.find(Id);

5363 assert(TypeI != ContextIdToAllocationType.end());

5364 auto CSI = ContextIdToContextSizeInfos.find(Id);

5365 if (CSI != ContextIdToContextSizeInfos.end()) {

5366 for (auto &Info : CSI->second) {

5368 if (TypeI->second == AllocationType::Cold)

5369 TotalCold += Info.TotalSize;

5370 }

5371 }

5372 }

5374 AT = AllocationType::Cold;

5375 }

5376 updateAllocationCall(Node->Call, AT);

5377 assert(Node->MatchingCalls.empty());

5378 return;

5379 }

5380

5381 if (!CallsiteToCalleeFuncCloneMap.count(Node))

5382 return;

5383

5384 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];

5385 updateCall(Node->Call, CalleeFunc);

5386

5387 for (auto &Call : Node->MatchingCalls)

5388 updateCall(Call, CalleeFunc);

5389

5390

5391

5392 if (!UnassignedCallClones.contains(Node))

5393 return;

5394 DenseSet NodeCallClones;

5395 for (auto *C : Node->Clones)

5396 NodeCallClones.insert(C->Call.cloneNo());

5397

5398 auto &ClonedCalls = UnassignedCallClones[Node];

5399 for (auto &[CloneNo, CallVector] : ClonedCalls) {

5400

5401 assert(CloneNo > 0);

5402

5403 if (NodeCallClones.contains(CloneNo))

5404 continue;

5405

5406 for (auto &Call : CallVector)

5407 updateCall(Call, CalleeFunc);

5408 }

5409 };

5410

5411

5412

5413

5414

5415

5416 DenseSet<const ContextNode *> Visited;

5417 for (auto &Entry : AllocationCallToContextNodeMap)

5418 UpdateCalls(Entry.second, Visited, UpdateCalls);

5419

5421}

5422

5423

5424

5426 SHA1 Hasher;

5427

5428

5429 for (auto &SN : FS->callsites()) {

5430

5431

5432

5434 SN.Clones.size() > I &&

5435 "Callsite summary has fewer entries than other summaries in function");

5436 if (SN.Clones.size() <= I || !SN.Clones[I])

5437 continue;

5441 }

5442

5443 for (auto &AN : FS->allocs()) {

5444

5445

5446

5447 assert(AN.Versions.size() > I &&

5448 "Alloc summary has fewer entries than other summaries in function");

5449 if (AN.Versions.size() <= I ||

5451 continue;

5453 }

5455}

5456

5460 &FuncToAliasMap,

5463

5464

5466 NewGV->takeName(DeclGV);

5469 };

5470

5471

5472

5473 auto CloneFuncAliases = [&](Function *NewF, unsigned I) {

5474 if (!FuncToAliasMap.count(&F))

5475 return;

5476 for (auto *A : FuncToAliasMap[&F]) {

5478 auto *PrevA = M.getNamedAlias(AliasName);

5480 A->getType()->getPointerAddressSpace(),

5481 A->getLinkage(), AliasName, NewF);

5482 NewA->copyAttributesFrom(A);

5483 if (PrevA)

5484 TakeDeclNameAndReplace(PrevA, NewA);

5485 }

5486 };

5487

5488

5489

5490 assert(NumClones > 1);

5492 VMaps.reserve(NumClones - 1);

5493 FunctionsClonedThinBackend++;

5494

5495

5496

5497

5498

5499

5500

5501

5502

5503

5504

5506

5507

5509

5510 for (unsigned I = 1; I < NumClones; I++) {

5511 VMaps.emplace_back(std::make_unique());

5514

5515

5516

5517 if (HashToFunc.contains(Hash)) {

5518 FunctionCloneDuplicatesThinBackend++;

5519 auto *Func = HashToFunc[Hash];

5520 if (Func->hasAvailableExternallyLinkage()) {

5521

5522

5523

5524

5525

5526 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());

5528 << "created clone decl " << ore::NV("Decl", Decl.getCallee()));

5529 continue;

5530 }

5531 auto *PrevF = M.getFunction(Name);

5533 if (PrevF)

5534 TakeDeclNameAndReplace(PrevF, Alias);

5536 << "created clone alias " << ore::NV("Alias", Alias));

5537

5538

5539 CloneFuncAliases(Func, I);

5540 continue;

5541 }

5543 HashToFunc[Hash] = NewF;

5544 FunctionClonesThinBackend++;

5545

5546

5547 for (auto &BB : *NewF) {

5548 for (auto &Inst : BB) {

5549 Inst.setMetadata(LLVMContext::MD_memprof, nullptr);

5550 Inst.setMetadata(LLVMContext::MD_callsite, nullptr);

5551 }

5552 }

5554 if (PrevF)

5555 TakeDeclNameAndReplace(PrevF, NewF);

5556 else

5557 NewF->setName(Name);

5560 << "created clone " << ore::NV("NewFunction", NewF));

5561

5562

5563 CloneFuncAliases(NewF, I);

5564 }

5565 return VMaps;

5566}

5567

5568

5569

5572 const Function *CallingFunc = nullptr) {

5573

5574

5575

5577 if (!TheFnVI)

5578

5579

5580

5583 if (TheFnVI)

5584 return TheFnVI;

5585

5588

5589

5590

5591 auto SrcFileMD = F.getMetadata("thinlto_src_file");

5592

5593

5594

5595

5596

5597 if (!SrcFileMD && F.isDeclaration()) {

5598

5599

5600 assert(CallingFunc);

5601 SrcFileMD = CallingFunc->getMetadata("thinlto_src_file");

5602

5603

5604

5605

5606 assert(SrcFileMD || OrigName == F.getName());

5607 }

5608 StringRef SrcFile = M.getSourceFileName();

5609 if (SrcFileMD)

5610 SrcFile = dyn_cast(SrcFileMD->getOperand(0))->getString();

5615

5616

5617

5618

5619

5620 if (!TheFnVI && OrigName == F.getName() && F.hasLocalLinkage() &&

5621 F.getName().contains('.')) {

5622 OrigName = F.getName().rsplit('.').first;

5627 }

5628

5629

5630

5631 assert(TheFnVI || F.isDeclaration());

5632 return TheFnVI;

5633}

5634

5635bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(

5637 ICallAnalysis = std::make_unique();

5638 Symtab = std::make_unique();

5639

5640

5641

5642

5643

5644

5645

5646

5647

5648

5649 if (Error E = Symtab->create(M, true, false)) {

5650 std::string SymtabFailure = toString(std::move(E));

5651 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);

5652 return false;

5653 }

5654 return true;

5655}

5656

5657#ifndef NDEBUG

5658

5659

5664 auto MIBIter = AllocNode.MIBs.begin();

5665 for (auto &MDOp : MemProfMD->operands()) {

5666 assert(MIBIter != AllocNode.MIBs.end());

5667 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();

5670 assert(StackMDNode);

5672 auto ContextIterBegin =

5674

5675 uint64_t LastStackContextId =

5676 (ContextIterBegin != StackContext.end() && *ContextIterBegin == 0) ? 1

5677 : 0;

5678 for (auto ContextIter = ContextIterBegin; ContextIter != StackContext.end();

5679 ++ContextIter) {

5680

5681

5682

5683 if (LastStackContextId == *ContextIter)

5684 continue;

5685 LastStackContextId = *ContextIter;

5686 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());

5688 *ContextIter);

5689 StackIdIndexIter++;

5690 }

5691 MIBIter++;

5692 }

5693}

5694#endif

5695

5696bool MemProfContextDisambiguation::applyImport(Module &M) {

5697 assert(ImportSummary);

5699

5700

5701

5702

5703 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>

5704 FuncToAliasMap;

5705 for (auto &A : M.aliases()) {

5706 auto *Aliasee = A.getAliaseeObject();

5708 FuncToAliasMap[F].insert(&A);

5709 }

5710

5711 if (!initializeIndirectCallPromotionInfo(M))

5712 return false;

5713

5714 for (auto &F : M) {

5716 continue;

5717

5718 OptimizationRemarkEmitter ORE(&F);

5719

5721 bool ClonesCreated = false;

5722 unsigned NumClonesCreated = 0;

5723 auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {

5724

5725 assert(NumClones > 0);

5726

5727 if (NumClones == 1)

5728 return;

5729

5730

5731

5732

5733 if (ClonesCreated) {

5734 assert(NumClonesCreated == NumClones);

5735 return;

5736 }

5738

5739 assert(VMaps.size() == NumClones - 1);

5741 ClonesCreated = true;

5742 NumClonesCreated = NumClones;

5743 };

5744

5745 auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,

5746 Function *CalledFunction, FunctionSummary *FS) {

5747

5748 CloneFuncIfNeeded(StackNode.Clones.size(), FS);

5749

5751

5752

5753

5754

5755

5756

5757

5758

5760 if (CalledFunction != CB->getCalledOperand() &&

5761 (!GA || CalledFunction != GA->getAliaseeObject())) {

5762 SkippedCallsCloning++;

5763 return;

5764 }

5765

5766

5767

5768 auto CalleeOrigName = CalledFunction->getName();

5769 for (unsigned J = 0; J < StackNode.Clones.size(); J++) {

5770

5771

5772 if (J > 0 && VMaps[J - 1]->empty())

5773 continue;

5774

5775

5776 if (!StackNode.Clones[J])

5777 continue;

5778 auto NewF = M.getOrInsertFunction(

5780 CalledFunction->getFunctionType());

5781 CallBase *CBClone;

5782

5783 if (!J)

5784 CBClone = CB;

5785 else

5787

5788

5789

5790

5791

5792

5794 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)

5795 << ore::NV("Call", CBClone) << " in clone "

5797 << " assigned to call function clone "

5798 << ore::NV("Callee", NewF.getCallee()));

5799 }

5800 };

5801

5802

5804

5805

5806

5807

5808 if (!TheFnVI)

5809 continue;

5810

5811 auto *GVSummary =

5812 ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());

5813 if (!GVSummary) {

5814

5815

5816 auto SrcModuleMD = F.getMetadata("thinlto_src_module");

5817 assert(SrcModuleMD &&

5818 "enable-import-metadata is needed to emit thinlto_src_module");

5819 StringRef SrcModule =

5822 if (GVS->modulePath() == SrcModule) {

5823 GVSummary = GVS.get();

5824 break;

5825 }

5826 }

5827 assert(GVSummary && GVSummary->modulePath() == SrcModule);

5828 }

5829

5830

5831

5833 continue;

5834

5836

5837 if (FS->allocs().empty() && FS->callsites().empty())

5838 continue;

5839

5840 auto SI = FS->callsites().begin();

5841 auto AI = FS->allocs().begin();

5842

5843

5844

5845

5846 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;

5847

5848

5849 for (auto CallsiteIt = FS->callsites().rbegin();

5850 CallsiteIt != FS->callsites().rend(); CallsiteIt++) {

5851 auto &Callsite = *CallsiteIt;

5852

5853

5854

5855 if (!Callsite.StackIdIndices.empty())

5856 break;

5857 MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite});

5858 }

5859

5860

5862

5863

5864

5865

5866 for (auto &BB : F) {

5867 for (auto &I : BB) {

5869

5871 continue;

5872

5873 auto *CalledValue = CB->getCalledOperand();

5874 auto *CalledFunction = CB->getCalledFunction();

5875 if (CalledValue && !CalledFunction) {

5876 CalledValue = CalledValue->stripPointerCasts();

5877

5879 }

5880

5881

5883 assert(!CalledFunction &&

5884 "Expected null called function in callsite for alias");

5886 }

5887

5888 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(

5889 I.getMetadata(LLVMContext::MD_callsite));

5890 auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);

5891

5892

5893

5894

5895

5896 if (CB->getAttributes().hasFnAttr("memprof") && !MemProfMD) {

5897 CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"

5898 ? AllocTypeColdThinBackend++

5899 : AllocTypeNotColdThinBackend++;

5900 OrigAllocsThinBackend++;

5901 AllocVersionsThinBackend++;

5902 if (!MaxAllocVersionsThinBackend)

5903 MaxAllocVersionsThinBackend = 1;

5904 continue;

5905 }

5906

5907 if (MemProfMD) {

5908

5909 assert(AI != FS->allocs().end());

5910 auto &AllocNode = *(AI++);

5911

5912#ifndef NDEBUG

5914 ImportSummary);

5915#endif

5916

5917

5918 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);

5919

5920 OrigAllocsThinBackend++;

5921 AllocVersionsThinBackend += AllocNode.Versions.size();

5922 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())

5923 MaxAllocVersionsThinBackend = AllocNode.Versions.size();

5924

5925

5926

5927

5928

5929

5930

5931

5932

5933 if (AllocNode.Versions.size() == 1 &&

5934 (AllocationType)AllocNode.Versions[0] != AllocationType::Cold) {

5936 AllocationType::NotCold ||

5938 AllocationType::None);

5939 UnclonableAllocsThinBackend++;

5940 continue;

5941 }

5942

5943

5945 return Type == ((uint8_t)AllocationType::NotCold |

5946 (uint8_t)AllocationType::Cold);

5947 }));

5948

5949

5950 for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {

5951

5952

5953 if (J > 0 && VMaps[J - 1]->empty())

5954 continue;

5955

5956 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)

5957 continue;

5959 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++

5960 : AllocTypeNotColdThinBackend++;

5963 AllocTypeString);

5964 CallBase *CBClone;

5965

5966 if (!J)

5967 CBClone = CB;

5968 else

5969

5970

5971

5975 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)

5976 << ore::NV("AllocationCall", CBClone) << " in clone "

5978 << " marked with memprof allocation attribute "

5979 << ore::NV("Attribute", AllocTypeString));

5980 }

5981 } else if (!CallsiteContext.empty()) {

5982 if (!CalledFunction) {

5983#ifndef NDEBUG

5984

5986 assert(!CI || !CI->isInlineAsm());

5987#endif

5988

5990

5991

5992

5993

5994

5995 auto NumClones =

5996 recordICPInfo(CB, FS->callsites(), SI, ICallAnalysisInfo);

5997

5998

5999

6000

6001 if (NumClones)

6002 CloneFuncIfNeeded(NumClones, FS);

6003 }

6004

6005 else {

6006

6007 assert(SI != FS->callsites().end());

6008 auto &StackNode = *(SI++);

6009

6010#ifndef NDEBUG

6011

6012

6014 for (auto StackId : CallsiteContext) {

6016 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==

6017 StackId);

6018 StackIdIndexIter++;

6019 }

6020#endif

6021

6022 CloneCallsite(StackNode, CB, CalledFunction, FS);

6023 }

6024 } else if (CB->isTailCall() && CalledFunction) {

6025

6026

6027 ValueInfo CalleeVI =

6029 if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {

6030 auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);

6031 assert(Callsite != MapTailCallCalleeVIToCallsite.end());

6032 CloneCallsite(Callsite->second, CB, CalledFunction, FS);

6033 }

6034 }

6035 }

6036 }

6037

6038

6039 performICP(M, FS->callsites(), VMaps, ICallAnalysisInfo, ORE);

6040 }

6041

6042

6043

6044 for (auto &F : M) {

6045

6046

6048 continue;

6049 for (auto &BB : F) {

6050 for (auto &I : BB) {

6052 continue;

6053 I.setMetadata(LLVMContext::MD_memprof, nullptr);

6054 I.setMetadata(LLVMContext::MD_callsite, nullptr);

6055 }

6056 }

6057 }

6058

6060}

6061

6062unsigned MemProfContextDisambiguation::recordICPInfo(

6066

6067 uint32_t NumCandidates;

6068 uint64_t TotalCount;

6069 auto CandidateProfileData =

6070 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,

6071 NumCandidates);

6072 if (CandidateProfileData.empty())

6073 return 0;

6074

6075

6076

6077

6078 bool ICPNeeded = false;

6079 unsigned NumClones = 0;

6080 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.begin(), SI);

6081 for (const auto &Candidate : CandidateProfileData) {

6082#ifndef NDEBUG

6083 auto CalleeValueInfo =

6084#endif

6085 ImportSummary->getValueInfo(Candidate.Value);

6086

6087

6088 assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);

6089 assert(SI != AllCallsites.end());

6090 auto &StackNode = *(SI++);

6091

6092

6093

6095 [](unsigned CloneNo) { return CloneNo != 0; });

6096

6097

6099 NumClones = StackNode.Clones.size();

6100 }

6101 if (!ICPNeeded)

6102 return NumClones;

6103

6104

6105 ICallAnalysisInfo.push_back({CB, CandidateProfileData.vec(), NumCandidates,

6106 TotalCount, CallsiteInfoStartIndex});

6107 return NumClones;

6108}

6109

6110void MemProfContextDisambiguation::performICP(

6112 ArrayRef<std::unique_ptr> VMaps,

6114 OptimizationRemarkEmitter &ORE) {

6115

6116

6117

6118

6119

6120

6121 for (auto &Info : ICallAnalysisInfo) {

6122 auto *CB = Info.CB;

6124 auto TotalCount = Info.TotalCount;

6125 unsigned NumPromoted = 0;

6126 unsigned NumClones = 0;

6127

6128 for (auto &Candidate : Info.CandidateProfileData) {

6129 auto &StackNode = AllCallsites[CallsiteIndex++];

6130

6131

6133 NumClones = StackNode.Clones.size();

6134

6135

6136

6137

6138

6139 Function *TargetFunction = Symtab->getFunction(Candidate.Value);

6140 if (TargetFunction == nullptr ||

6141

6142

6143

6144

6147 ORE.emit([&]() {

6148 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", CB)

6149 << "Memprof cannot promote indirect call: target with md5sum "

6150 << ore::NV("target md5sum", Candidate.Value) << " not found";

6151 });

6152

6153

6154

6155 continue;

6156 }

6157

6158

6159 const char *Reason = nullptr;

6161 ORE.emit([&]() {

6162 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", CB)

6163 << "Memprof cannot promote indirect call to "

6164 << ore::NV("TargetFunction", TargetFunction)

6165 << " with count of " << ore::NV("TotalCount", TotalCount)

6166 << ": " << Reason;

6167 });

6168 continue;

6169 }

6170

6172

6173

6174

6175

6176 CallBase *CBClone = CB;

6177 for (unsigned J = 0; J < NumClones; J++) {

6178

6179

6180 if (J > 0 && VMaps[J - 1]->empty())

6181 continue;

6182

6183 if (J > 0)

6185

6186

6187

6190 TotalCount, isSamplePGO, &ORE);

6191 auto *TargetToUse = TargetFunction;

6192

6193

6194 if (StackNode.Clones[J]) {

6195 TargetToUse =

6198 StackNode.Clones[J]),

6200 .getCallee());

6201 }

6202 DirectCall.setCalledFunction(TargetToUse);

6203

6204

6205

6206

6207

6208

6212 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)

6213 << ore::NV("Call", CBClone) << " in clone "

6215 << " promoted and assigned to call function clone "

6216 << ore::NV("Callee", TargetToUse));

6217 }

6218

6219

6220 TotalCount -= Candidate.Count;

6221 NumPromoted++;

6222 }

6223

6224

6225 CallBase *CBClone = CB;

6226 for (unsigned J = 0; J < NumClones; J++) {

6227

6228

6229 if (J > 0 && VMaps[J - 1]->empty())

6230 continue;

6231

6232 if (J > 0)

6234

6235 CBClone->setMetadata(LLVMContext::MD_prof, nullptr);

6236

6237

6238 if (TotalCount != 0)

6240 M, *CBClone, ArrayRef(Info.CandidateProfileData).slice(NumPromoted),

6241 TotalCount, IPVK_IndirectCallTarget, Info.NumCandidates);

6242 }

6243 }

6244}

6245

6246template <typename DerivedCCG, typename FuncTy, typename CallTy>

6247bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {

6249 dbgs() << "CCG before cloning:\n";

6250 dbgs() << *this;

6251 }

6253 exportToDot("postbuild");

6254

6256 check();

6257 }

6258

6259 identifyClones();

6260

6262 check();

6263 }

6264

6266 dbgs() << "CCG after cloning:\n";

6267 dbgs() << *this;

6268 }

6270 exportToDot("cloned");

6271

6272 bool Changed = assignFunctions();

6273

6275 dbgs() << "CCG after assigning function clones:\n";

6276 dbgs() << *this;

6277 }

6279 exportToDot("clonefuncassign");

6280

6282 printTotalSizes(errs());

6283

6285}

6286

6287bool MemProfContextDisambiguation::processModule(

6289 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {

6290

6291

6292

6293 if (ImportSummary)

6294 return applyImport(M);

6295

6296

6297

6298

6299

6300

6301

6302

6303

6305 return false;

6306

6307 ModuleCallsiteContextGraph CCG(M, OREGetter);

6308 return CCG.process();

6309}

6310

6313 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {

6314

6315

6318 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");

6322 "-memprof-dot-scope=context requires -memprof-dot-context-id");

6326 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "

6327 "-memprof-dot-context-id");

6328 if (ImportSummary) {

6329

6330

6331

6333 return;

6334 }

6336 return;

6337

6338 auto ReadSummaryFile =

6340 if (!ReadSummaryFile) {

6343 "': ");

6344 return;

6345 }

6347 if (!ImportSummaryForTestingOrErr) {

6350 "': ");

6351 return;

6352 }

6353 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);

6354 ImportSummary = ImportSummaryForTesting.get();

6355}

6356

6362 };

6363 if (!processModule(M, OREGetter))

6366}

6367

6371 isPrevailing) {

6372

6373

6374

6375

6378 return;

6379

6380 IndexCallsiteContextGraph CCG(Index, isPrevailing);

6381 CCG.process();

6382}

6383

6384

6385

6386

6387

6389

6390

6391

6392

6393

6395 for (auto &F : M) {

6396 for (auto &BB : F) {

6397 for (auto &I : BB) {

6399 if (!CI)

6400 continue;

6401 if (CI->hasFnAttr("memprof")) {

6402 CI->removeFnAttr("memprof");

6404 }

6405 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {

6406 assert(!CI->hasMetadata(LLVMContext::MD_memprof));

6407 continue;

6408 }

6409

6410

6411

6412 CI->setMetadata(LLVMContext::MD_memprof, nullptr);

6413 CI->setMetadata(LLVMContext::MD_callsite, nullptr);

6415 }

6416 }

6417 }

6421}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU Prepare AGPR Alloc

Unify divergent function exit nodes

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

Module.h This file contains the declarations for the Module class.

Machine Check Debug Module

This file implements a map that provides insertion order iteration.

static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))

uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)

Definition MemProfContextDisambiguation.cpp:5425

static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))

static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))

static bool isMemProfClone(const Function &F)

Definition MemProfContextDisambiguation.cpp:2298

static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))

static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))

static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))

static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)

Definition MemProfContextDisambiguation.cpp:1634

static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))

bool checkColdOrNotCold(uint8_t AllocType)

Definition MemProfContextDisambiguation.cpp:3895

static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)

Definition MemProfContextDisambiguation.cpp:5570

static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))

static std::string getAllocTypeString(uint8_t AllocTypes)

Definition MemProfContextDisambiguation.cpp:1407

static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))

bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight

Definition MemProfContextDisambiguation.cpp:3540

DotScope

Definition MemProfContextDisambiguation.cpp:130

@ Context

Definition MemProfContextDisambiguation.cpp:133

@ Alloc

Definition MemProfContextDisambiguation.cpp:132

@ All

Definition MemProfContextDisambiguation.cpp:131

static unsigned getMemProfCloneNum(const Function &F)

Definition MemProfContextDisambiguation.cpp:2305

static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)

Definition MemProfContextDisambiguation.cpp:5457

static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))

static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)

Definition MemProfContextDisambiguation.cpp:1643

static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))

static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)

Definition MemProfContextDisambiguation.cpp:2290

static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)

static const std::string MemProfCloneSuffix

Definition MemProfContextDisambiguation.cpp:2288

static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)

Definition MemProfContextDisambiguation.cpp:4305

static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))

static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))

static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))

static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)

Definition MemProfContextDisambiguation.cpp:5660

static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))

This is the interface to build a ModuleSummaryIndex for a module.

ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...

if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod

FunctionAnalysisManager FAM

std::pair< BasicBlock *, BasicBlock * > Edge

This file defines generic set operations that may be used on set's of different types,...

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

ValueInfo getAliaseeVI() const

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

bool empty() const

empty - Check if the array is empty.

static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)

Return a uniquified Attribute object.

void addFnAttr(Attribute::AttrKind Kind)

Adds the attribute to the function.

void setCalledOperand(Value *V)

Subprogram description. Uses SubclassData1.

ValueT & at(const_arg_type_t< KeyT > Val)

at - Return the entry for the specified key, or abort if no such entry exists.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

bool erase(const KeyT &Val)

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

void reserve(size_type NumEntries)

Grow the densemap so that it can contain at least NumEntries items before resizing again.

Implements a dense probed hash-table based set.

Function summary information to aid decisions and implementation of importing.

FunctionType * getFunctionType() const

Returns the FunctionType for me.

DISubprogram * getSubprogram() const

Get the attached subprogram.

const Function & getFunction() const

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)

If a parent module is specified, the alias is automatically inserted into the end of the specified mo...

Function and variable summary information to aid decisions and implementation of importing.

static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)

Return a 64-bit global unique ID constructed from the name of a global symbol.

static bool isLocalLinkage(LinkageTypes Linkage)

LLVM_ABI bool isDeclaration() const

Return true if the primary definition of this global value is outside of the current translation unit...

uint64_t GUID

Declare a type to represent a global unique identifier for a global value.

Module * getParent()

Get the module that this global value is contained inside of...

LLVM_ABI void eraseFromParent()

This method unlinks 'this' from the containing module and deletes it.

static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)

Return the modified name for a global value suitable to be used as the key for a global lookup (e....

@ InternalLinkage

Rename collisions when linking (static functions).

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

const MDOperand & getOperand(unsigned I) const

ArrayRef< MDOperand > operands() const

unsigned getNumOperands() const

Return number of MDNode operands.

LLVM_ABI TempMDNode clone() const

Create a (temporary) clone of this.

static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)

Replace a temporary node with a uniqued one.

static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)

This class implements a map that also provides access to all stored values in a deterministic order.

size_type count(const KeyT &Key) const

MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)

Definition MemProfContextDisambiguation.cpp:6311

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition MemProfContextDisambiguation.cpp:6357

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition MemProfContextDisambiguation.cpp:6388

static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)

Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...

Class to hold module path string table and global value map, and encapsulate methods for operating on...

static StringRef getOriginalNameBeforePromote(StringRef Name)

Helper to obtain the unpromoted name for a global value (or the original name if not promoted).

ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const

Return a ValueInfo for the index value_type (convenient when iterating index).

uint64_t getStackIdAtIndex(unsigned Index) const

A Module instance is used to store all the information related to an LLVM module.

LLVMContext & getContext() const

Get the global data context.

A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...

A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

A class that wrap the SHA1 algorithm.

LLVM_ABI void update(ArrayRef< uint8_t > Data)

Digest more data.

LLVM_ABI std::array< uint8_t, 20 > result()

Return the current raw 160-bits SHA1 for the digested data since the last call to init().

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const

Implement operator<< on Value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

std::pair< iterator, bool > insert(const ValueT &V)

void reserve(size_t Size)

Grow the DenseSet so that it can contain at least NumEntries items before resizing again.

void insert_range(Range &&R)

void swap(DenseSetImpl &RHS)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

bool erase(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

An efficient, type-erasing, non-owning reference to a callable.

Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...

CallStackIterator beginAfterSharedPrefix(const CallStack &Other)

CallStackIterator end() const

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

@ CE

Windows NT (Windows on ARM)

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)

Extract a Value from Metadata, if any.

LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)

Returns the allocation type from an MIB metadata node.

LLVM_ABI bool metadataMayIncludeContextSizeInfo()

Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...

LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)

True if the AllocTypes bitmask contains just a single type.

LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)

Returns the string to use in attributes with the given type.

LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)

Returns the stack node from an MIB metadata node.

LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)

Removes any existing "ambiguous" memprof attribute.

DiagnosticInfoOptimizationBase::Argument NV

LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)

NodeAddr< NodeBase * > Node

NodeAddr< FuncNode * > Func

friend class Instruction

Iterator for Instructions in a `BasicBlock.

uint64_t read64le(const void *P)

void write32le(void *P, uint32_t V)

This is an optimization pass for GlobalISel generic memory operations.

cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))

Definition MemProfContextDisambiguation.cpp:228

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})

Log all errors (if any) in E to OS.

FunctionAddr VTableAddr Value

void stable_sort(R &&Range)

cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))

LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)

Return true if the given indirect call site can be made to call Callee.

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

void set_intersect(S1Ty &S1, const S2Ty &S2)

set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)

Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...

constexpr from_range_t from_range

static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))

auto dyn_cast_if_present(const Y &Val)

dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...

cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))

bool set_is_subset(const S1Ty &S1, const S2Ty &S2)

set_is_subset(A, B) - Return true iff A in B

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

void set_subtract(S1Ty &S1, const S2Ty &S2)

set_subtract(A, B) - Compute A := A - B

InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy

Provide the FunctionAnalysisManager to Module proxy.

raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")

bool set_intersects(const S1Ty &S1, const S2Ty &S2)

set_intersects(A, B) - Return true iff A ^ B is non empty

detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)

Returns a concatenated range across two or more ranges.

LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)

Parse the specified bitcode buffer, returning the module summary index.

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)

Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

FunctionAddr VTableAddr Count

bool set_union(S1Ty &S1, const S2Ty &S2)

set_union(A, B) - Compute A := A u B, return whether A changed.

cl::opt< bool > SupportsHotColdNew

Indicate we are linking with an allocator that supports hot/cold operator new interfaces.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)

set_intersection(A, B) - Return A ^ B

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

FunctionAddr VTableAddr uintptr_t uintptr_t Data

cl::opt< bool > EnableMemProfContextDisambiguation

Enable MemProf context disambiguation for thin link.

S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)

set_difference(A, B) - Return A - B

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

Expected< T > errorOrToExpected(ErrorOr< T > &&EO)

Convert an ErrorOr to an Expected.

ArrayRef(const T &OneElt) -> ArrayRef< T >

std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)

ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)

Return a copy of the specified function and add it to that function's module.

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))

DOTGraphTraits(bool IsSimple=false)

Definition MemProfContextDisambiguation.cpp:3382

typename GTraits::NodeRef NodeRef

Definition MemProfContextDisambiguation.cpp:3395

static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)

Definition MemProfContextDisambiguation.cpp:3450

const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType

Definition MemProfContextDisambiguation.cpp:3393

typename GTraits::ChildIteratorType ChildIteratorType

Definition MemProfContextDisambiguation.cpp:3396

static std::string getNodeAttributes(NodeRef Node, GraphType G)

Definition MemProfContextDisambiguation.cpp:3419

static bool isNodeHidden(NodeRef Node, GraphType G)

Definition MemProfContextDisambiguation.cpp:3483

static std::string getNodeLabel(NodeRef Node, GraphType G)

Definition MemProfContextDisambiguation.cpp:3398

GraphTraits< GraphType > GTraits

Definition MemProfContextDisambiguation.cpp:3394

static NodeRef getNode(const NodePtrTy &P)

Definition MemProfContextDisambiguation.cpp:3341

static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)

Definition MemProfContextDisambiguation.cpp:3361

static ChildIteratorType child_end(NodeRef N)

Definition MemProfContextDisambiguation.cpp:3374

std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy

Definition MemProfContextDisambiguation.cpp:3340

mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType

Definition MemProfContextDisambiguation.cpp:3365

const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType

Definition MemProfContextDisambiguation.cpp:3337

const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef

Definition MemProfContextDisambiguation.cpp:3338

mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator

Definition MemProfContextDisambiguation.cpp:3343

static ChildIteratorType child_begin(NodeRef N)

Definition MemProfContextDisambiguation.cpp:3370

static NodeRef getEntryNode(GraphType G)

Definition MemProfContextDisambiguation.cpp:3355

static nodes_iterator nodes_begin(GraphType G)

Definition MemProfContextDisambiguation.cpp:3347

static nodes_iterator nodes_end(GraphType G)

Definition MemProfContextDisambiguation.cpp:3351

std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy

Definition MemProfContextDisambiguation.cpp:3359

Summary of memprof metadata on allocations.

std::vector< MIBInfo > MIBs

SmallVector< unsigned > StackIdIndices

SmallVector< unsigned > Clones

DefaultDOTGraphTraits(bool simple=false)

An information struct used to provide DenseMap with the various necessary components for a given valu...

typename GraphType::UnknownGraphTypeError NodeRef

Struct that holds a reference to a particular GUID in a global value summary.

ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const

GlobalValue::GUID getGUID() const

PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType

Definition MemProfContextDisambiguation.cpp:1024

static SimpleType getSimplifiedValue(IndexCall &Val)

Definition MemProfContextDisambiguation.cpp:1025

const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType

Definition MemProfContextDisambiguation.cpp:1028

static SimpleType getSimplifiedValue(const IndexCall &Val)

Definition MemProfContextDisambiguation.cpp:1029

Define a template that can be specialized by smart pointers to reflect the fact that they are automat...