LLVM: lib/Transforms/Utils/CodeLayout.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

46

47#include

48#include

49

50using namespace llvm;

52

53#define DEBUG_TYPE "code-layout"

54

55namespace llvm {

58 cl::desc("Enable machine block placement based on the ext-tsp model, "

59 "optimizing I-cache utilization."));

60

62 "ext-tsp-apply-without-profile",

63 cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),

65}

66

67

68

71 cl::desc("The weight of conditional forward jumps for ExtTSP value"));

72

75 cl::desc("The weight of unconditional forward jumps for ExtTSP value"));

76

79 cl::desc("The weight of conditional backward jumps for ExtTSP value"));

80

83 cl::desc("The weight of unconditional backward jumps for ExtTSP value"));

84

87 cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"));

88

91 cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"));

92

95 cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"));

96

99 cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));

100

101

102

105 cl::desc("The maximum size of a chain to create"));

106

107

108

111 cl::desc("The maximum size of a chain to apply splitting"));

112

113

116 cl::desc("The maximum ratio between densities of two chains for merging"));

117

118

120 cl::desc("The size of the cache"));

121

123 cl::desc("The size of a line in the cache"));

124

127 cl::desc("The maximum size of a chain to create"));

128

131 cl::desc("The power exponent for the distance-based locality"));

132

135 cl::desc("The scale factor for the frequency-based locality"));

136

137namespace {

138

139

140constexpr double EPS = 1e-8;

141

142

144 double Weight) {

145 if (JumpDist > JumpMaxDist)

146 return 0;

147 double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist;

148 return Weight * Prob * Count;

149}

150

151

152

155

156 if (SrcAddr + SrcSize == DstAddr) {

157 return jumpExtTSPScore(0, 1, Count,

160 }

161

162 if (SrcAddr + SrcSize < DstAddr) {

163 const uint64_t Dist = DstAddr - (SrcAddr + SrcSize);

167 }

168

169 const uint64_t Dist = SrcAddr + SrcSize - DstAddr;

173}

174

175

176

177enum class MergeTypeT : int { X_Y, Y_X, X1_Y_X2, Y_X2_X1, X2_X1_Y };

178

179

180

181struct MergeGainT {

182 explicit MergeGainT() = default;

183 explicit MergeGainT(double Score, size_t MergeOffset, MergeTypeT MergeType)

184 : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}

185

186 double score() const { return Score; }

187

188 size_t mergeOffset() const { return MergeOffset; }

189

190 MergeTypeT mergeType() const { return MergeType; }

191

192 void setMergeType(MergeTypeT Ty) { MergeType = Ty; }

193

194

196 return (Other.Score > EPS && Other.Score > Score + EPS);

197 }

198

199

200 void updateIfLessThan(const MergeGainT &Other) {

201 if (*this < Other)

203 }

204

205private:

206 double Score{-1.0};

207 size_t MergeOffset{0};

208 MergeTypeT MergeType{MergeTypeT::X_Y};

209};

210

211struct JumpT;

212struct ChainT;

213struct ChainEdge;

214

215

216

217struct NodeT {

218 NodeT(const NodeT &) = delete;

219 NodeT(NodeT &&) = default;

220 NodeT &operator=(const NodeT &) = delete;

221 NodeT &operator=(NodeT &&) = default;

222

223 explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)

224 : Index(Index), Size(Size), ExecutionCount(Count) {}

225

226 bool isEntry() const { return Index == 0; }

227

228

229 bool isSuccessor(const NodeT *Other) const;

230

231

232 uint64_t outCount() const;

233

234

235 uint64_t inCount() const;

236

237

238 size_t Index{0};

239

240 size_t CurIndex{0};

241

242 uint64_t Size{0};

243

244 uint64_t ExecutionCount{0};

245

246 ChainT *CurChain{nullptr};

247

248 mutable uint64_t EstimatedAddr{0};

249

250 NodeT *ForcedSucc{nullptr};

251

252 NodeT *ForcedPred{nullptr};

253

254 std::vector<JumpT *> OutJumps;

255

256 std::vector<JumpT *> InJumps;

257};

258

259

260struct JumpT {

261 JumpT(const JumpT &) = delete;

262 JumpT(JumpT &&) = default;

263 JumpT &operator=(const JumpT &) = delete;

264 JumpT &operator=(JumpT &&) = default;

265

266 explicit JumpT(NodeT *Source, NodeT *Target, uint64_t ExecutionCount)

267 : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}

268

269

270 NodeT *Source;

271

272 NodeT *Target;

273

274 uint64_t ExecutionCount{0};

275

276 bool IsConditional{false};

277

278 uint64_t Offset{0};

279};

280

281

282struct ChainT {

283 ChainT(const ChainT &) = delete;

284 ChainT(ChainT &&) = default;

285 ChainT &operator=(const ChainT &) = delete;

286 ChainT &operator=(ChainT &&) = default;

287

288 explicit ChainT(uint64_t Id, NodeT *Node)

289 : Id(Id), ExecutionCount(Node->ExecutionCount), Size(Node->Size),

290 Nodes(1, Node) {}

291

292 size_t numBlocks() const { return Nodes.size(); }

293

294 double density() const { return ExecutionCount / Size; }

295

296 bool isEntry() const { return Nodes[0]->Index == 0; }

297

298 bool isCold() const {

299 for (NodeT *Node : Nodes) {

300 if (Node->ExecutionCount > 0)

301 return false;

302 }

303 return true;

304 }

305

306 ChainEdge *getEdge(ChainT *Other) const {

307 for (const auto &[Chain, ChainEdge] : Edges) {

308 if (Chain == Other)

309 return ChainEdge;

310 }

311 return nullptr;

312 }

313

314 void removeEdge(ChainT *Other) {

315 auto It = Edges.begin();

316 while (It != Edges.end()) {

317 if (It->first == Other) {

318 Edges.erase(It);

319 return;

320 }

321 It++;

322 }

323 }

324

326 Edges.push_back(std::make_pair(Other, Edge));

327 }

328

329 void merge(ChainT *Other, std::vector<NodeT *> MergedBlocks) {

330 Nodes = std::move(MergedBlocks);

331

332 ExecutionCount += Other->ExecutionCount;

333 Size += Other->Size;

334 Id = Nodes[0]->Index;

335

336 for (size_t Idx = 0; Idx < Nodes.size(); Idx++) {

337 Nodes[Idx]->CurChain = this;

338 Nodes[Idx]->CurIndex = Idx;

339 }

340 }

341

342 void mergeEdges(ChainT *Other);

343

344 void clear() {

345 Nodes.clear();

346 Nodes.shrink_to_fit();

347 Edges.clear();

348 Edges.shrink_to_fit();

349 }

350

351

352 uint64_t Id;

353

354 double Score{0};

355

356

357 double ExecutionCount{0};

358

359 uint64_t Size{0};

360

361 std::vector<NodeT *> Nodes;

362

363 std::vector<std::pair<ChainT *, ChainEdge *>> Edges;

364};

365

366

367

368

369struct ChainEdge {

370 ChainEdge(const ChainEdge &) = delete;

371 ChainEdge(ChainEdge &&) = default;

372 ChainEdge &operator=(const ChainEdge &) = delete;

373 ChainEdge &operator=(ChainEdge &&) = delete;

374

375 explicit ChainEdge(JumpT *Jump)

376 : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),

377 Jumps(1, Jump) {}

378

379 ChainT *srcChain() const { return SrcChain; }

380

381 ChainT *dstChain() const { return DstChain; }

382

383 bool isSelfEdge() const { return SrcChain == DstChain; }

384

385 const std::vector<JumpT *> &jumps() const { return Jumps; }

386

387 void appendJump(JumpT *Jump) { Jumps.push_back(Jump); }

388

389 void moveJumps(ChainEdge *Other) {

391 Other->Jumps.clear();

392 Other->Jumps.shrink_to_fit();

393 }

394

395 void changeEndpoint(ChainT *From, ChainT *To) {

396 if (From == SrcChain)

397 SrcChain = To;

398 if (From == DstChain)

399 DstChain = To;

400 }

401

402 bool hasCachedMergeGain(ChainT *Src, ChainT *Dst) const {

403 return Src == SrcChain ? CacheValidForward : CacheValidBackward;

404 }

405

406 MergeGainT getCachedMergeGain(ChainT *Src, ChainT *Dst) const {

407 return Src == SrcChain ? CachedGainForward : CachedGainBackward;

408 }

409

410 void setCachedMergeGain(ChainT *Src, ChainT *Dst, MergeGainT MergeGain) {

411 if (Src == SrcChain) {

412 CachedGainForward = MergeGain;

413 CacheValidForward = true;

414 } else {

415 CachedGainBackward = MergeGain;

416 CacheValidBackward = true;

417 }

418 }

419

420 void invalidateCache() {

421 CacheValidForward = false;

422 CacheValidBackward = false;

423 }

424

425 void setMergeGain(MergeGainT Gain) { CachedGain = Gain; }

426

427 MergeGainT getMergeGain() const { return CachedGain; }

428

429 double gain() const { return CachedGain.score(); }

430

431private:

432

433 ChainT *SrcChain{nullptr};

434

435 ChainT *DstChain{nullptr};

436

437 std::vector<JumpT *> Jumps;

438

439 MergeGainT CachedGain;

440

441

442

443

444

445 MergeGainT CachedGainForward;

446 MergeGainT CachedGainBackward;

447

448 bool CacheValidForward{false};

449 bool CacheValidBackward{false};

450};

451

452bool NodeT::isSuccessor(const NodeT *Other) const {

453 for (JumpT *Jump : OutJumps)

454 if (Jump->Target == Other)

455 return true;

456 return false;

457}

458

459uint64_t NodeT::outCount() const {

460 uint64_t Count = 0;

461 for (JumpT *Jump : OutJumps)

462 Count += Jump->ExecutionCount;

464}

465

466uint64_t NodeT::inCount() const {

467 uint64_t Count = 0;

468 for (JumpT *Jump : InJumps)

469 Count += Jump->ExecutionCount;

471}

472

473void ChainT::mergeEdges(ChainT *Other) {

474

475 for (const auto &[DstChain, DstEdge] : Other->Edges) {

476 ChainT *TargetChain = DstChain == Other ? this : DstChain;

477 ChainEdge *CurEdge = getEdge(TargetChain);

478 if (CurEdge == nullptr) {

479 DstEdge->changeEndpoint(Other, this);

480 this->addEdge(TargetChain, DstEdge);

481 if (DstChain != this && DstChain != Other)

482 DstChain->addEdge(this, DstEdge);

483 } else {

484 CurEdge->moveJumps(DstEdge);

485 }

486

487 if (DstChain != Other)

488 DstChain->removeEdge(Other);

489 }

490}

491

492using NodeIter = std::vector<NodeT *>::const_iterator;

493static std::vector<NodeT *> EmptyList;

494

495

496

497struct MergedNodesT {

498 MergedNodesT(NodeIter Begin1, NodeIter End1,

499 NodeIter Begin2 = EmptyList.begin(),

500 NodeIter End2 = EmptyList.end(),

501 NodeIter Begin3 = EmptyList.begin(),

502 NodeIter End3 = EmptyList.end())

503 : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),

504 End3(End3) {}

505

506 template void forEach(const F &Func) const {

507 for (auto It = Begin1; It != End1; It++)

509 for (auto It = Begin2; It != End2; It++)

511 for (auto It = Begin3; It != End3; It++)

513 }

514

515 std::vector<NodeT *> getNodes() const {

516 std::vector<NodeT *> Result;

517 Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +

518 std::distance(Begin3, End3));

523 }

524

525 const NodeT *getFirstNode() const { return *Begin1; }

526

527private:

528 NodeIter Begin1;

529 NodeIter End1;

530 NodeIter Begin2;

531 NodeIter End2;

532 NodeIter Begin3;

533 NodeIter End3;

534};

535

536

537struct MergedJumpsT {

538 MergedJumpsT(const std::vector<JumpT *> *Jumps1,

539 const std::vector<JumpT *> *Jumps2 = nullptr) {

540 assert(!Jumps1->empty() && "cannot merge empty jump list");

541 JumpArray[0] = Jumps1;

542 JumpArray[1] = Jumps2;

543 }

544

545 template void forEach(const F &Func) const {

546 for (auto Jumps : JumpArray)

547 if (Jumps != nullptr)

548 for (JumpT *Jump : *Jumps)

550 }

551

552private:

553 std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr};

554};

555

556

557

558

559

560

561MergedNodesT mergeNodes(const std::vector<NodeT *> &X,

562 const std::vector<NodeT *> &Y, size_t MergeOffset,

563 MergeTypeT MergeType) {

564

565 NodeIter BeginX1 = X.begin();

566 NodeIter EndX1 = X.begin() + MergeOffset;

567 NodeIter BeginX2 = X.begin() + MergeOffset;

568 NodeIter EndX2 = X.end();

569 NodeIter BeginY = Y.begin();

570 NodeIter EndY = Y.end();

571

572

573 switch (MergeType) {

574 case MergeTypeT::X_Y:

575 return MergedNodesT(BeginX1, EndX2, BeginY, EndY);

576 case MergeTypeT::Y_X:

577 return MergedNodesT(BeginY, EndY, BeginX1, EndX2);

578 case MergeTypeT::X1_Y_X2:

579 return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);

580 case MergeTypeT::Y_X2_X1:

581 return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);

582 case MergeTypeT::X2_X1_Y:

583 return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);

584 }

586}

587

588

589class ExtTSPImpl {

590public:

591 ExtTSPImpl(ArrayRef<uint64_t> NodeSizes, ArrayRef<uint64_t> NodeCounts,

593 : NumNodes(NodeSizes.size()) {

594 initialize(NodeSizes, NodeCounts, EdgeCounts);

595 }

596

597

598 std::vector<uint64_t> run() {

599

600 mergeForcedPairs();

601

602

603 mergeChainPairs();

604

605

606 mergeColdChains();

607

608

609 return concatChains();

610 }

611

612private:

613

614 void initialize(const ArrayRef<uint64_t> &NodeSizes,

615 const ArrayRef<uint64_t> &NodeCounts,

617

618 AllNodes.reserve(NumNodes);

619 for (uint64_t Idx = 0; Idx < NumNodes; Idx++) {

620 uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL);

621 uint64_t ExecutionCount = NodeCounts[Idx];

622

623 if (Idx == 0 && ExecutionCount == 0)

624 ExecutionCount = 1;

625 AllNodes.emplace_back(Idx, Size, ExecutionCount);

626 }

627

628

629 SuccNodes.resize(NumNodes);

630 PredNodes.resize(NumNodes);

631 std::vector<uint64_t> OutDegree(NumNodes, 0);

632 AllJumps.reserve(EdgeCounts.size());

633 for (auto Edge : EdgeCounts) {

634 ++OutDegree[Edge.src];

635

637 continue;

638

639 SuccNodes[Edge.src].push_back(Edge.dst);

640 PredNodes[Edge.dst].push_back(Edge.src);

641 if (Edge.count > 0) {

642 NodeT &PredNode = AllNodes[Edge.src];

643 NodeT &SuccNode = AllNodes[Edge.dst];

644 AllJumps.emplace_back(&PredNode, &SuccNode, Edge.count);

645 SuccNode.InJumps.push_back(&AllJumps.back());

646 PredNode.OutJumps.push_back(&AllJumps.back());

647

648 PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Edge.count);

649 SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Edge.count);

650 }

651 }

652 for (JumpT &Jump : AllJumps) {

653 assert(OutDegree[Jump.Source->Index] > 0 &&

654 "incorrectly computed out-degree of the block");

655 Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;

656 }

657

658

659 AllChains.reserve(NumNodes);

660 HotChains.reserve(NumNodes);

661 for (NodeT &Node : AllNodes) {

662

663 AllChains.emplace_back(Node.Index, &Node);

664 Node.CurChain = &AllChains.back();

665 if (Node.ExecutionCount > 0)

666 HotChains.push_back(&AllChains.back());

667 }

668

669

670 AllEdges.reserve(AllJumps.size());

671 for (NodeT &PredNode : AllNodes) {

672 for (JumpT *Jump : PredNode.OutJumps) {

673 assert(Jump->ExecutionCount > 0 && "incorrectly initialized jump");

674 NodeT *SuccNode = Jump->Target;

675 ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);

676

677 if (CurEdge != nullptr) {

678 assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);

679 CurEdge->appendJump(Jump);

680 continue;

681 }

682

683 AllEdges.emplace_back(Jump);

684 PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());

685 SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());

686 }

687 }

688 }

689

690

691

692

693

694 void mergeForcedPairs() {

695

696 for (NodeT &Node : AllNodes) {

697 if (SuccNodes[Node.Index].size() == 1 &&

698 PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&

699 SuccNodes[Node.Index][0] != 0) {

700 size_t SuccIndex = SuccNodes[Node.Index][0];

701 Node.ForcedSucc = &AllNodes[SuccIndex];

702 AllNodes[SuccIndex].ForcedPred = &Node;

703 }

704 }

705

706

707

708

709

710

711

712 for (NodeT &Node : AllNodes) {

713 if (Node.ForcedSucc == nullptr || Node.ForcedPred == nullptr)

714 continue;

715

716 NodeT *SuccNode = Node.ForcedSucc;

717 while (SuccNode != nullptr && SuccNode != &Node) {

718 SuccNode = SuccNode->ForcedSucc;

719 }

720 if (SuccNode == nullptr)

721 continue;

722

723 AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr;

724 Node.ForcedPred = nullptr;

725 }

726

727

728 for (NodeT &Node : AllNodes) {

729 if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) {

730 const NodeT *CurBlock = &Node;

731 while (CurBlock->ForcedSucc != nullptr) {

732 const NodeT *NextBlock = CurBlock->ForcedSucc;

733 mergeChains(Node.CurChain, NextBlock->CurChain, 0, MergeTypeT::X_Y);

734 CurBlock = NextBlock;

735 }

736 }

737 }

738 }

739

740

741 void mergeChainPairs() {

742

743 auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,

744 const ChainT *A2, const ChainT *B2) {

745 return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);

746 };

747

748 while (HotChains.size() > 1) {

749 ChainT *BestChainPred = nullptr;

750 ChainT *BestChainSucc = nullptr;

751 MergeGainT BestGain;

752

753 for (ChainT *ChainPred : HotChains) {

754

755 for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {

756

757 if (Edge->isSelfEdge())

758 continue;

759

760

761 if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)

762 continue;

763

764

765

766

767 const double ChainPredDensity = ChainPred->density();

768 const double ChainSuccDensity = ChainSucc->density();

769 assert(ChainPredDensity > 0.0 && ChainSuccDensity > 0.0 &&

770 "incorrectly computed chain densities");

771 auto [MinDensity, MaxDensity] =

772 std::minmax(ChainPredDensity, ChainSuccDensity);

773 const double Ratio = MaxDensity / MinDensity;

775 continue;

776

777

778 MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);

779 if (CurGain.score() <= EPS)

780 continue;

781

782 if (BestGain < CurGain ||

783 (std::abs(CurGain.score() - BestGain.score()) < EPS &&

784 compareChainPairs(ChainPred, ChainSucc, BestChainPred,

785 BestChainSucc))) {

786 BestGain = CurGain;

787 BestChainPred = ChainPred;

788 BestChainSucc = ChainSucc;

789 }

790 }

791 }

792

793

794 if (BestGain.score() <= EPS)

795 break;

796

797

798 mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),

799 BestGain.mergeType());

800 }

801 }

802

803

804

805

806 void mergeColdChains() {

807 for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {

808

809

810 size_t NumSuccs = SuccNodes[SrcBB].size();

811 for (size_t Idx = 0; Idx < NumSuccs; Idx++) {

812 size_t DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];

813 ChainT *SrcChain = AllNodes[SrcBB].CurChain;

814 ChainT *DstChain = AllNodes[DstBB].CurChain;

815 if (SrcChain != DstChain && !DstChain->isEntry() &&

816 SrcChain->Nodes.back()->Index == SrcBB &&

817 DstChain->Nodes.front()->Index == DstBB &&

818 SrcChain->isCold() == DstChain->isCold()) {

819 mergeChains(SrcChain, DstChain, 0, MergeTypeT::X_Y);

820 }

821 }

822 }

823 }

824

825

826 double extTSPScore(const MergedNodesT &Nodes,

827 const MergedJumpsT &Jumps) const {

828 uint64_t CurAddr = 0;

829 Nodes.forEach([&](const NodeT *Node) {

830 Node->EstimatedAddr = CurAddr;

831 CurAddr += Node->Size;

832 });

833

834 double Score = 0;

835 Jumps.forEach([&](const JumpT *Jump) {

836 const NodeT *SrcBlock = Jump->Source;

837 const NodeT *DstBlock = Jump->Target;

838 Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,

839 DstBlock->EstimatedAddr, Jump->ExecutionCount,

840 Jump->IsConditional);

841 });

842 return Score;

843 }

844

845

846

847

848

849

850

851 MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,

852 ChainEdge *Edge) const {

853 if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))

854 return Edge->getCachedMergeGain(ChainPred, ChainSucc);

855

856 assert(Edge->jumps().empty() && "trying to merge chains w/o jumps");

857

858 ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);

859 MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr);

860

861

862 MergeGainT Gain = MergeGainT();

863

864

865

866 auto tryChainMerging = [&](size_t Offset,

867 const std::vector &MergeTypes) {

868

869 if (Offset == 0 || Offset == ChainPred->Nodes.size())

870 return;

871

872 NodeT *Node = ChainPred->Nodes[Offset - 1];

873 if (Node->ForcedSucc != nullptr)

874 return;

875

876

877 for (const MergeTypeT &MergeType : MergeTypes) {

878 Gain.updateIfLessThan(

879 computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));

880 }

881 };

882

883

884 Gain.updateIfLessThan(

885 computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));

886

887

888 for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {

889 const NodeT *SrcBlock = Jump->Source;

890 if (SrcBlock->CurChain != ChainPred)

891 continue;

892 size_t Offset = SrcBlock->CurIndex + 1;

893 tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});

894 }

895

896

897 for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {

898 const NodeT *DstBlock = Jump->Target;

899 if (DstBlock->CurChain != ChainPred)

900 continue;

901 size_t Offset = DstBlock->CurIndex;

902 tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});

903 }

904

905

907 for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {

908

909

910

911 const NodeT *BB = ChainPred->Nodes[Offset - 1];

912 const NodeT *BB2 = ChainPred->Nodes[Offset];

913 if (BB->isSuccessor(BB2))

914 continue;

915

916

917

918 tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,

919 MergeTypeT::X2_X1_Y});

920 }

921 }

922

923 Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);

924 return Gain;

925 }

926

927

928

929

930

931 MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,

932 const MergedJumpsT &Jumps, size_t MergeOffset,

933 MergeTypeT MergeType) const {

934 MergedNodesT MergedNodes =

935 mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);

936

937

938 if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&

939 !MergedNodes.getFirstNode()->isEntry())

940 return MergeGainT();

941

942

943 double NewScore = extTSPScore(MergedNodes, Jumps);

944 double CurScore = ChainPred->Score;

945 return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);

946 }

947

948

949

950 void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,

951 MergeTypeT MergeType) {

952 assert(Into != From && "a chain cannot be merged with itself");

953

954

955 MergedNodesT MergedNodes =

956 mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);

957 Into->merge(From, MergedNodes.getNodes());

958

959

960 Into->mergeEdges(From);

961 From->clear();

962

963

964 ChainEdge *SelfEdge = Into->getEdge(Into);

965 if (SelfEdge != nullptr) {

966 MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end());

967 MergedJumpsT MergedJumps(&SelfEdge->jumps());

968 Into->Score = extTSPScore(MergedNodes, MergedJumps);

969 }

970

971

973

974

975 for (auto EdgeIt : Into->Edges)

976 EdgeIt.second->invalidateCache();

977 }

978

979

980 std::vector<uint64_t> concatChains() {

981

982 std::vector<const ChainT *> SortedChains;

983 for (ChainT &Chain : AllChains) {

984 if (!Chain.Nodes.empty())

985 SortedChains.push_back(&Chain);

986 }

987

988

989 std::sort(SortedChains.begin(), SortedChains.end(),

990 [&](const ChainT *L, const ChainT *R) {

991

992 if (L->isEntry() != R->isEntry())

993 return L->isEntry();

994

995

996 return std::make_tuple(-L->density(), L->Id) <

997 std::make_tuple(-R->density(), R->Id);

998 });

999

1000

1001 std::vector<uint64_t> Order;

1002 Order.reserve(NumNodes);

1003 for (const ChainT *Chain : SortedChains)

1004 for (NodeT *Node : Chain->Nodes)

1005 Order.push_back(Node->Index);

1006 return Order;

1007 }

1008

1009private:

1010

1011 const size_t NumNodes;

1012

1013

1014 std::vector<std::vector<uint64_t>> SuccNodes;

1015

1016

1017 std::vector<std::vector<uint64_t>> PredNodes;

1018

1019

1020 std::vector AllNodes;

1021

1022

1023 std::vector AllJumps;

1024

1025

1026 std::vector AllChains;

1027

1028

1029 std::vector AllEdges;

1030

1031

1032 std::vector<ChainT *> HotChains;

1033};

1034

1035

1036

1037class CDSortImpl {

1038public:

1039 CDSortImpl(const CDSortConfig &Config, ArrayRef<uint64_t> NodeSizes,

1041 ArrayRef<uint64_t> EdgeOffsets)

1042 : Config(Config), NumNodes(NodeSizes.size()) {

1043 initialize(NodeSizes, NodeCounts, EdgeCounts, EdgeOffsets);

1044 }

1045

1046

1047 std::vector<uint64_t> run() {

1048

1049 mergeChainPairs();

1050

1051

1052 return concatChains();

1053 }

1054

1055private:

1056

1057 void initialize(const ArrayRef<uint64_t> &NodeSizes,

1058 const ArrayRef<uint64_t> &NodeCounts,

1060 const ArrayRef<uint64_t> &EdgeOffsets) {

1061

1062 AllNodes.reserve(NumNodes);

1063 for (uint64_t Node = 0; Node < NumNodes; Node++) {

1064 uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);

1065 uint64_t ExecutionCount = NodeCounts[Node];

1066 AllNodes.emplace_back(Node, Size, ExecutionCount);

1067 TotalSamples += ExecutionCount;

1068 if (ExecutionCount > 0)

1069 TotalSize += Size;

1070 }

1071

1072

1073 SuccNodes.resize(NumNodes);

1074 PredNodes.resize(NumNodes);

1075 AllJumps.reserve(EdgeCounts.size());

1076 for (size_t I = 0; I < EdgeCounts.size(); I++) {

1077 auto [Pred, Succ, Count] = EdgeCounts[I];

1078

1079 if (Pred == Succ)

1080 continue;

1081

1082 SuccNodes[Pred].push_back(Succ);

1083 PredNodes[Succ].push_back(Pred);

1084 if (Count > 0) {

1085 NodeT &PredNode = AllNodes[Pred];

1086 NodeT &SuccNode = AllNodes[Succ];

1087 AllJumps.emplace_back(&PredNode, &SuccNode, Count);

1088 AllJumps.back().Offset = EdgeOffsets[I];

1089 SuccNode.InJumps.push_back(&AllJumps.back());

1090 PredNode.OutJumps.push_back(&AllJumps.back());

1091

1092 PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Count);

1093 SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Count);

1094 }

1095 }

1096

1097

1098 AllChains.reserve(NumNodes);

1099 for (NodeT &Node : AllNodes) {

1100

1101 Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());

1102 Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());

1103

1104 AllChains.emplace_back(Node.Index, &Node);

1105 Node.CurChain = &AllChains.back();

1106 }

1107

1108

1109 AllEdges.reserve(AllJumps.size());

1110 for (NodeT &PredNode : AllNodes) {

1111 for (JumpT *Jump : PredNode.OutJumps) {

1112 NodeT *SuccNode = Jump->Target;

1113 ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);

1114

1115 if (CurEdge != nullptr) {

1116 assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);

1117 CurEdge->appendJump(Jump);

1118 continue;

1119 }

1120

1121 AllEdges.emplace_back(Jump);

1122 PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());

1123 SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());

1124 }

1125 }

1126 }

1127

1128

1129 void mergeChainPairs() {

1130

1131 auto GainComparator = [](ChainEdge *L, ChainEdge *R) {

1132 return std::make_tuple(-L->gain(), L->srcChain()->Id, L->dstChain()->Id) <

1133 std::make_tuple(-R->gain(), R->srcChain()->Id, R->dstChain()->Id);

1134 };

1135 std::set<ChainEdge *, decltype(GainComparator)> Queue(GainComparator);

1136

1137

1138 [[maybe_unused]] size_t NumActiveChains = 0;

1139 for (NodeT &Node : AllNodes) {

1140 if (Node.ExecutionCount == 0)

1141 continue;

1142 ++NumActiveChains;

1143 for (const auto &[_, Edge] : Node.CurChain->Edges) {

1144

1145 if (Edge->isSelfEdge())

1146 continue;

1147

1148 if (Edge->gain() != -1.0)

1149 continue;

1150

1151

1152 MergeGainT Gain = getBestMergeGain(Edge);

1153 Edge->setMergeGain(Gain);

1154

1155 if (Edge->gain() > EPS)

1157 }

1158 }

1159

1160

1161 while (Queue.empty()) {

1162

1163 ChainEdge *BestEdge = *Queue.begin();

1165 ChainT *BestSrcChain = BestEdge->srcChain();

1166 ChainT *BestDstChain = BestEdge->dstChain();

1167

1168

1169 for (const auto &[_, ChainEdge] : BestSrcChain->Edges)

1170 Queue.erase(ChainEdge);

1171 for (const auto &[_, ChainEdge] : BestDstChain->Edges)

1172 Queue.erase(ChainEdge);

1173

1174

1175 MergeGainT BestGain = BestEdge->getMergeGain();

1176 mergeChains(BestSrcChain, BestDstChain, BestGain.mergeOffset(),

1177 BestGain.mergeType());

1178 --NumActiveChains;

1179

1180

1181 for (const auto &[_, Edge] : BestSrcChain->Edges) {

1182

1183 if (Edge->isSelfEdge())

1184 continue;

1185 if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >

1187 continue;

1188

1189

1190 MergeGainT Gain = getBestMergeGain(Edge);

1191 Edge->setMergeGain(Gain);

1192

1193 if (Edge->gain() > EPS)

1195 }

1196 }

1197

1198 LLVM_DEBUG(dbgs() << "Cache-directed function sorting reduced the number"

1199 << " of chains from " << NumNodes << " to "

1200 << NumActiveChains << "\n");

1201 }

1202

1203

1204

1205

1206

1207

1208

1209 MergeGainT getBestMergeGain(ChainEdge *Edge) const {

1210 assert(Edge->jumps().empty() && "trying to merge chains w/o jumps");

1211

1212 MergedJumpsT Jumps(&Edge->jumps());

1213 ChainT *SrcChain = Edge->srcChain();

1214 ChainT *DstChain = Edge->dstChain();

1215

1216

1217 MergeGainT Gain = MergeGainT();

1218

1219

1220

1221 auto tryChainMerging = [&](const std::vector &MergeTypes) {

1222

1223

1224 for (const MergeTypeT &MergeType : MergeTypes) {

1225 MergeGainT NewGain =

1226 computeMergeGain(SrcChain, DstChain, Jumps, MergeType);

1227

1228

1229

1230 if (std::abs(Gain.score() - NewGain.score()) < EPS) {

1231 if ((MergeType == MergeTypeT::X_Y && SrcChain->Id < DstChain->Id) ||

1232 (MergeType == MergeTypeT::Y_X && SrcChain->Id > DstChain->Id)) {

1233 Gain = NewGain;

1234 }

1235 } else if (NewGain.score() > Gain.score() + EPS) {

1236 Gain = NewGain;

1237 }

1238 }

1239 };

1240

1241

1242 tryChainMerging({MergeTypeT::X_Y, MergeTypeT::Y_X});

1243

1244 return Gain;

1245 }

1246

1247

1248

1249

1250 MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc,

1251 const MergedJumpsT &Jumps,

1252 MergeTypeT MergeType) const {

1253

1254 double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc);

1255

1256

1257 size_t MergeOffset = 0;

1258 auto MergedBlocks =

1259 mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);

1260 double DistGain = distBasedLocalityGain(MergedBlocks, Jumps);

1261

1262 double GainScore = DistGain + Config.FrequencyScale * FreqGain;

1263

1264 if (GainScore >= 0.0)

1265 GainScore /= std::min(ChainPred->Size, ChainSucc->Size);

1266

1267 return MergeGainT(GainScore, MergeOffset, MergeType);

1268 }

1269

1270

1271 double freqBasedLocalityGain(ChainT *ChainPred, ChainT *ChainSucc) const {

1272 auto missProbability = [&](double ChainDensity) {

1273 double PageSamples = ChainDensity * Config.CacheSize;

1274 if (PageSamples >= TotalSamples)

1275 return 0.0;

1276 double P = PageSamples / TotalSamples;

1277 return pow(1.0 - P, static_cast<double>(Config.CacheEntries));

1278 };

1279

1280

1281 double CurScore =

1282 ChainPred->ExecutionCount * missProbability(ChainPred->density()) +

1283 ChainSucc->ExecutionCount * missProbability(ChainSucc->density());

1284

1285

1286 double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount;

1287 double MergedSize = ChainPred->Size + ChainSucc->Size;

1288 double MergedDensity = MergedCounts / MergedSize;

1289 double NewScore = MergedCounts * missProbability(MergedDensity);

1290

1291 return CurScore - NewScore;

1292 }

1293

1294

1295 double distScore(uint64_t SrcAddr, uint64_t DstAddr, uint64_t Count) const {

1296 uint64_t Dist = SrcAddr <= DstAddr ? DstAddr - SrcAddr : SrcAddr - DstAddr;

1297 double D = Dist == 0 ? 0.1 : static_cast<double>(Dist);

1299 }

1300

1301

1302 double distBasedLocalityGain(const MergedNodesT &Nodes,

1303 const MergedJumpsT &Jumps) const {

1304 uint64_t CurAddr = 0;

1305 Nodes.forEach([&](const NodeT *Node) {

1306 Node->EstimatedAddr = CurAddr;

1307 CurAddr += Node->Size;

1308 });

1309

1310 double CurScore = 0;

1311 double NewScore = 0;

1312 Jumps.forEach([&](const JumpT *Jump) {

1313 uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset;

1314 uint64_t DstAddr = Jump->Target->EstimatedAddr;

1315 NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount);

1316 CurScore += distScore(0, TotalSize, Jump->ExecutionCount);

1317 });

1318 return NewScore - CurScore;

1319 }

1320

1321

1322

1323 void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,

1324 MergeTypeT MergeType) {

1325 assert(Into != From && "a chain cannot be merged with itself");

1326

1327

1328 MergedNodesT MergedNodes =

1329 mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);

1330 Into->merge(From, MergedNodes.getNodes());

1331

1332

1333 Into->mergeEdges(From);

1334 From->clear();

1335 }

1336

1337

1338 std::vector<uint64_t> concatChains() {

1339

1340 std::vector<const ChainT *> SortedChains;

1341 DenseMap<const ChainT *, double> ChainDensity;

1342 for (ChainT &Chain : AllChains) {

1343 if (!Chain.Nodes.empty()) {

1344 SortedChains.push_back(&Chain);

1345

1346 double Size = 0;

1347 double ExecutionCount = 0;

1348 for (NodeT *Node : Chain.Nodes) {

1349 Size += static_cast<double>(Node->Size);

1350 ExecutionCount += static_cast<double>(Node->ExecutionCount);

1351 }

1352 assert(Size > 0 && "a chain of zero size");

1353 ChainDensity[&Chain] = ExecutionCount / Size;

1354 }

1355 }

1356

1357

1358 std::sort(SortedChains.begin(), SortedChains.end(),

1359 [&](const ChainT *L, const ChainT *R) {

1360 const double DL = ChainDensity[L];

1361 const double DR = ChainDensity[R];

1362

1363 return std::make_tuple(-DL, L->Id) <

1364 std::make_tuple(-DR, R->Id);

1365 });

1366

1367

1368 std::vector<uint64_t> Order;

1369 Order.reserve(NumNodes);

1370 for (const ChainT *Chain : SortedChains)

1371 for (NodeT *Node : Chain->Nodes)

1372 Order.push_back(Node->Index);

1373 return Order;

1374 }

1375

1376private:

1377

1378 const CDSortConfig Config;

1379

1380

1381 const size_t NumNodes;

1382

1383

1384 std::vector<std::vector<uint64_t>> SuccNodes;

1385

1386

1387 std::vector<std::vector<uint64_t>> PredNodes;

1388

1389

1390 std::vector AllNodes;

1391

1392

1393 std::vector AllJumps;

1394

1395

1396 std::vector AllChains;

1397

1398

1399 std::vector AllEdges;

1400

1401

1402 uint64_t TotalSamples{0};

1403

1404

1405 uint64_t TotalSize{0};

1406};

1407

1408}

1409

1410std::vector<uint64_t>

1414

1415 assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");

1416 assert(NodeSizes.size() > 2 && "Incorrect input");

1417

1418

1419 ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts);

1420 std::vector<uint64_t> Result = Alg.run();

1421

1422

1423 assert(Result.front() == 0 && "Original entry point is not preserved");

1424 assert(Result.size() == NodeSizes.size() && "Incorrect size of layout");

1425 return Result;

1426}

1427

1431

1433 for (uint64_t Idx = 1; Idx < Order.size(); Idx++)

1434 Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];

1436 for (auto &Edge : EdgeCounts)

1437 ++OutDegree[Edge.src];

1438

1439

1440 double Score = 0;

1441 for (auto &Edge : EdgeCounts) {

1442 bool IsConditional = OutDegree[Edge.src] > 1;

1443 Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst],

1444 Edge.count, IsConditional);

1445 }

1446 return Score;

1447}

1448

1452 for (uint64_t Idx = 0; Idx < NodeSizes.size(); Idx++)

1453 Order[Idx] = Idx;

1455}

1456

1461

1462 assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input");

1463

1464

1465 CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets);

1466 std::vector<uint64_t> Result = Alg.run();

1467 assert(Result.size() == FuncSizes.size() && "Incorrect size of layout");

1468 return Result;

1469}

1470

1475

1478 if (CacheSize.getNumOccurrences() > 0)

1487 CallOffsets);

1488}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static cl::opt< unsigned > CacheSize("cdsort-cache-size", cl::ReallyHidden, cl::desc("The size of a line in the cache"))

static cl::opt< unsigned > ForwardDistance("ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024), cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"))

static cl::opt< unsigned > BackwardDistance("ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640), cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"))

static cl::opt< double > BackwardWeightCond("ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of conditional backward jumps for ExtTSP value"))

static cl::opt< double > FrequencyScale("cdsort-frequency-scale", cl::ReallyHidden, cl::desc("The scale factor for the frequency-based locality"))

static cl::opt< double > ForwardWeightUncond("ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of unconditional forward jumps for ExtTSP value"))

static cl::opt< double > DistancePower("cdsort-distance-power", cl::ReallyHidden, cl::desc("The power exponent for the distance-based locality"))

static cl::opt< unsigned > MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512), cl::desc("The maximum size of a chain to create"))

static cl::opt< unsigned > ChainSplitThreshold("ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), cl::desc("The maximum size of a chain to apply splitting"))

static cl::opt< double > FallthroughWeightUncond("ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05), cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"))

static cl::opt< double > BackwardWeightUncond("ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of unconditional backward jumps for ExtTSP value"))

static cl::opt< unsigned > CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden, cl::desc("The maximum size of a chain to create"))

static cl::opt< double > ForwardWeightCond("ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of conditional forward jumps for ExtTSP value"))

static cl::opt< unsigned > CacheEntries("cdsort-cache-entries", cl::ReallyHidden, cl::desc("The size of the cache"))

static cl::opt< double > MaxMergeDensityRatio("ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(100), cl::desc("The maximum ratio between densities of two chains for merging"))

static cl::opt< double > FallthroughWeightCond("ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0), cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"))

Declares methods and data structures for code layout algorithms.

static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)

static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)

std::pair< BasicBlock *, BasicBlock * > Edge

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)

Initialize the set of available library functions based on the specified target triple.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_ABI APInt pow(const APInt &X, int64_t N)

Compute X^N for N>=0.

initializer< Ty > init(const Ty &Val)

LLVM_ABI std::vector< uint64_t > computeCacheDirectedLayout(ArrayRef< uint64_t > FuncSizes, ArrayRef< uint64_t > FuncCounts, ArrayRef< EdgeCount > CallCounts, ArrayRef< uint64_t > CallOffsets)

Apply a Cache-Directed Sort for functions represented by a call graph.

Definition CodeLayout.cpp:1471

LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)

Estimate the "quality" of a given node order in CFG.

Definition CodeLayout.cpp:1428

LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)

Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...

Definition CodeLayout.cpp:1411

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

NodeAddr< NodeBase * > Node

NodeAddr< FuncNode * > Func

This is an optimization pass for GlobalISel generic memory operations.

bool operator<(int64_t V1, const APSInt &V2)

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

cl::opt< bool > ApplyExtTspWithoutProfile

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionAddr VTableAddr Count

ArrayRef(const T &OneElt) -> ArrayRef< T >

cl::opt< bool > EnableExtTspBlockPlacement

Algorithm-specific params for Cache-Directed Sort.

unsigned MaxChainSize

The maximum size of a chain to create.

unsigned CacheSize

The size of a line in the cache.

double DistancePower

The power exponent for the distance-based locality.

double FrequencyScale

The scale factor for the frequency-based locality.

unsigned CacheEntries

The size of the cache.