LLVM: lib/Transforms/Utils/SampleProfileInference.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

20#include

21#include

22#include

23#include <unordered_set>

24

25using namespace llvm;

26#define DEBUG_TYPE "sample-profile-inference"

27

28namespace {

29

30static cl::opt SampleProfileEvenFlowDistribution(

32 cl::desc("Try to evenly distribute flow when there are multiple equally "

33 "likely options."));

34

35static cl::opt SampleProfileRebalanceUnknown(

37 cl::desc("Evenly re-distribute flow among unknown subgraphs."));

38

41 cl::desc("Join isolated components having positive flow."));

42

45 cl::desc("The cost of increasing a block's count by one."));

46

49 cl::desc("The cost of decreasing a block's count by one."));

50

52 "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,

53 cl::desc("The cost of increasing the entry block's count by one."));

54

56 "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,

57 cl::desc("The cost of decreasing the entry block's count by one."));

58

61 cl::desc("The cost of increasing a count of zero-weight block by one."));

62

64 "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,

65 cl::desc("The cost of increasing an unknown block's count by one."));

66

67

68

69

70static constexpr int64_t INF = ((int64_t)1) << 50;

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86class MinCostMaxFlow {

87public:

88 MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}

89

90

92 Source = SourceNode;

94

95 Nodes = std::vector(NodeCount);

96 Edges = std::vector<std::vector>(NodeCount, std::vector());

98 AugmentingEdges =

99 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());

100 }

101

102

103 int64_t run() {

104 LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");

105

106

107

108 size_t AugmentationIters = applyFlowAugmentation();

109

110

111 int64_t TotalCost = 0;

112 int64_t TotalFlow = 0;

113 for (uint64_t Src = 0; Src < Nodes.size(); Src++) {

114 for (auto &Edge : Edges[Src]) {

115 if (Edge.Flow > 0) {

116 TotalCost += Edge.Cost * Edge.Flow;

117 if (Src == Source)

118 TotalFlow += Edge.Flow;

119 }

120 }

121 }

122 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters

123 << " iterations with " << TotalFlow << " total flow"

124 << " of " << TotalCost << " cost\n");

125 (void)TotalFlow;

126 (void)AugmentationIters;

127 return TotalCost;

128 }

129

130

131

132

134 assert(Capacity > 0 && "adding an edge of zero capacity");

135 assert(Src != Dst && "loop edge are not supported");

136

137 Edge SrcEdge;

138 SrcEdge.Dst = Dst;

139 SrcEdge.Cost = Cost;

140 SrcEdge.Capacity = Capacity;

141 SrcEdge.Flow = 0;

142 SrcEdge.RevEdgeIndex = Edges[Dst].size();

143

144 Edge DstEdge;

145 DstEdge.Dst = Src;

146 DstEdge.Cost = -Cost;

147 DstEdge.Capacity = 0;

148 DstEdge.Flow = 0;

149 DstEdge.RevEdgeIndex = Edges[Src].size();

150

151 Edges[Src].push_back(SrcEdge);

152 Edges[Dst].push_back(DstEdge);

153 }

154

155

158 }

159

160

161

162 std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {

163 std::vector<std::pair<uint64_t, int64_t>> Flow;

164 for (const auto &Edge : Edges[Src]) {

165 if (Edge.Flow > 0)

166 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));

167 }

169 }

170

171

173 int64_t Flow = 0;

174 for (const auto &Edge : Edges[Src]) {

175 if (Edge.Dst == Dst) {

176 Flow += Edge.Flow;

177 }

178 }

180 }

181

182private:

183

184

185 size_t applyFlowAugmentation() {

186 size_t AugmentationIters = 0;

187 while (findAugmentingPath()) {

188 uint64_t PathCapacity = computeAugmentingPathCapacity();

189 while (PathCapacity > 0) {

190 bool Progress = false;

192

193 identifyShortestEdges(PathCapacity);

194

195

196 auto AugmentingOrder = findAugmentingDAG();

197

198

199 Progress = augmentFlowAlongDAG(AugmentingOrder);

200 PathCapacity = computeAugmentingPathCapacity();

201 }

202

203 if (!Progress) {

204 augmentFlowAlongPath(PathCapacity);

205 PathCapacity = 0;

206 }

207

208 AugmentationIters++;

209 }

210 }

211 return AugmentationIters;

212 }

213

214

215

216 uint64_t computeAugmentingPathCapacity() {

219 while (Now != Source) {

220 uint64_t Pred = Nodes[Now].ParentNode;

221 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];

222

223 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");

225 PathCapacity = std::min(PathCapacity, EdgeCapacity);

226

227 Now = Pred;

228 }

229 return PathCapacity;

230 }

231

232

233 bool findAugmentingPath() {

234

235 for (auto &Node : Nodes) {

239 Node.Taken = false;

240 }

241

242 std::queue<uint64_t> Queue;

243 Queue.push(Source);

244 Nodes[Source].Distance = 0;

245 Nodes[Source].Taken = true;

246 while (!Queue.empty()) {

247 uint64_t Src = Queue.front();

248 Queue.pop();

249 Nodes[Src].Taken = false;

250

251

252

253

254

255

256

257

258

259

260

261

262

263

265 break;

266 if (Nodes[Src].Distance > Nodes[Target].Distance)

267 continue;

268

269

270 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {

271 auto &Edge = Edges[Src][EdgeIdx];

272 if (Edge.Flow < Edge.Capacity) {

274 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;

275 if (Nodes[Dst].Distance > NewDistance) {

276

277 Nodes[Dst].Distance = NewDistance;

278 Nodes[Dst].ParentNode = Src;

279 Nodes[Dst].ParentEdgeIndex = EdgeIdx;

280

281 if (!Nodes[Dst].Taken) {

282 Queue.push(Dst);

283 Nodes[Dst].Taken = true;

284 }

285 }

286 }

287 }

288 }

289

290 return Nodes[Target].Distance != INF;

291 }

292

293

294 void augmentFlowAlongPath(uint64_t PathCapacity) {

295 assert(PathCapacity > 0 && "found an incorrect augmenting path");

297 while (Now != Source) {

298 uint64_t Pred = Nodes[Now].ParentNode;

299 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];

300 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];

301

302 Edge.Flow += PathCapacity;

303 RevEdge.Flow -= PathCapacity;

304

305 Now = Pred;

306 }

307 }

308

309

310

311

312

313

314

315

316 std::vector<uint64_t> findAugmentingDAG() {

317

318

319

320

321

322 typedef std::pair<uint64_t, uint64_t> StackItemType;

323 std::stack Stack;

324 std::vector<uint64_t> AugmentingOrder;

325

326

327 for (auto &Node : Nodes) {

328 Node.Discovery = 0;

329 Node.Finish = 0;

330 Node.NumCalls = 0;

331 Node.Taken = false;

332 }

334

335

336 Nodes[Target].Taken = true;

337

338

339 Stack.emplace(Source, 0);

340 Nodes[Source].Discovery = ++Time;

341 while (!Stack.empty()) {

342 auto NodeIdx = Stack.top().first;

343 auto EdgeIdx = Stack.top().second;

344

345

346 if (EdgeIdx < Edges[NodeIdx].size()) {

347 auto &Edge = Edges[NodeIdx][EdgeIdx];

348 auto &Dst = Nodes[Edge.Dst];

349 Stack.top().second++;

350

351 if (Edge.OnShortestPath) {

352

353 if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {

354 Dst.Discovery = ++Time;

355 Stack.emplace(Edge.Dst, 0);

356 Dst.NumCalls++;

357 } else if (Dst.Taken && Dst.Finish != 0) {

358

359 Nodes[NodeIdx].Taken = true;

360 }

361 }

362 } else {

363

364 Stack.pop();

365

366 if (!Nodes[NodeIdx].Taken) {

367 Nodes[NodeIdx].Discovery = 0;

368 } else {

369

370

371 Nodes[NodeIdx].Finish = ++Time;

372

373 if (NodeIdx != Source) {

374 assert(!Stack.empty() && "empty stack while running dfs");

375 Nodes[Stack.top().first].Taken = true;

376 }

377 AugmentingOrder.push_back(NodeIdx);

378 }

379 }

380 }

381

382 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());

383

384

385 for (size_t Src : AugmentingOrder) {

386 AugmentingEdges[Src].clear();

387 for (auto &Edge : Edges[Src]) {

389 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&

390 Nodes[Dst].Finish < Nodes[Src].Finish) {

391 AugmentingEdges[Src].push_back(&Edge);

392 }

393 }

394 assert((Src == Target || !AugmentingEdges[Src].empty()) &&

395 "incorrectly constructed augmenting edges");

396 }

397

398 return AugmentingOrder;

399 }

400

401

402

403

404

405 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {

406

407 for (uint64_t Src : AugmentingOrder) {

408 Nodes[Src].FracFlow = 0;

409 Nodes[Src].IntFlow = 0;

410 for (auto &Edge : AugmentingEdges[Src]) {

411 Edge->AugmentedFlow = 0;

412 }

413 }

414

415

417 Nodes[Source].FracFlow = 1.0;

418 for (uint64_t Src : AugmentingOrder) {

419 assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&

420 "incorrectly computed fractional flow");

421

422 uint64_t Degree = AugmentingEdges[Src].size();

423 for (auto &Edge : AugmentingEdges[Src]) {

424 double EdgeFlow = Nodes[Src].FracFlow / Degree;

425 Nodes[Edge->Dst].FracFlow += EdgeFlow;

426 if (Edge->Capacity == INF)

427 continue;

428 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;

429 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);

430 }

431 }

432

433 if (MaxFlowAmount == 0)

434 return false;

435

436

437 Nodes[Source].IntFlow = MaxFlowAmount;

438 for (uint64_t Src : AugmentingOrder) {

440 break;

441

442

443 uint64_t Degree = AugmentingEdges[Src].size();

444

445 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;

446 for (auto &Edge : AugmentingEdges[Src]) {

448 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);

449 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));

450 Nodes[Dst].IntFlow += EdgeFlow;

451 Nodes[Src].IntFlow -= EdgeFlow;

452 Edge->AugmentedFlow += EdgeFlow;

453 }

454 }

455 assert(Nodes[Target].IntFlow <= MaxFlowAmount);

456 Nodes[Target].IntFlow = 0;

457

458

459

460

461 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {

462 uint64_t Src = AugmentingOrder[Idx - 1];

463

464

465 for (auto &Edge : AugmentingEdges[Src]) {

467 if (Nodes[Dst].IntFlow == 0)

468 continue;

469 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);

470 Nodes[Dst].IntFlow -= EdgeFlow;

471 Nodes[Src].IntFlow += EdgeFlow;

472 Edge->AugmentedFlow -= EdgeFlow;

473 }

474 }

475

476

477 bool HasSaturatedEdges = false;

478 for (uint64_t Src : AugmentingOrder) {

479

480 assert(Src == Source || Nodes[Src].IntFlow == 0);

481 for (auto &Edge : AugmentingEdges[Src]) {

482 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);

483

484 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];

485 Edge->Flow += Edge->AugmentedFlow;

486 RevEdge.Flow -= Edge->AugmentedFlow;

487 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)

488 HasSaturatedEdges = true;

489 }

490 }

491

492

493 return HasSaturatedEdges;

494 }

495

496

497 void identifyShortestEdges(uint64_t PathCapacity) {

498 assert(PathCapacity > 0 && "found an incorrect augmenting DAG");

499

500

501

502

503 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));

504

505

506 for (size_t Src = 0; Src < Nodes.size(); Src++) {

507

508 if (Nodes[Src].Distance > Nodes[Target].Distance)

509 continue;

510

511 for (auto &Edge : Edges[Src]) {

513 Edge.OnShortestPath =

514 Src != Target && Dst != Source &&

515 Nodes[Dst].Distance <= Nodes[Target].Distance &&

516 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&

517 Edge.Capacity > Edge.Flow &&

518 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;

519 }

520 }

521 }

522

523

524 static constexpr uint64_t MaxDfsCalls = 10;

525

526

527 struct Node {

528

529 int64_t Distance;

530

532

534

535 bool Taken;

536

537

538

539 double FracFlow;

540

542

544

546

548 };

549

550

551 struct Edge {

552

553 int64_t Cost;

554

555 int64_t Capacity;

556

557 int64_t Flow;

558

560

562

563

564

565 bool OnShortestPath;

566

568 };

569

570

571 std::vector Nodes;

572

573 std::vector<std::vector> Edges;

574

576

578

579 std::vector<std::vector<Edge *>> AugmentingEdges;

580

582};

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600class FlowAdjuster {

601public:

603 : Params(Params), Func(Func) {}

604

605

606 void run() {

608

609 joinIsolatedComponents();

610 }

611

613

614 rebalanceUnknownSubgraphs();

615 }

616 }

617

618private:

619 void joinIsolatedComponents() {

620

621 auto Visited = BitVector(NumBlocks(), false);

622 findReachable(Func.Entry, Visited);

623

624

625 for (uint64_t I = 0; I < NumBlocks(); I++) {

626 auto &Block = Func.Blocks[I];

627 if (Block.Flow > 0 && !Visited[I]) {

628

629 auto Path = findShortestPath(I);

630

631 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&

632 "incorrectly computed path adjusting control flow");

633 Func.Blocks[Func.Entry].Flow += 1;

634 for (auto &Jump : Path) {

635 Jump->Flow += 1;

636 Func.Blocks[Jump->Target].Flow += 1;

637

638 findReachable(Jump->Target, Visited);

639 }

640 }

641 }

642 }

643

644

645

647 if (Visited[Src])

648 return;

649 std::queue<uint64_t> Queue;

650 Queue.push(Src);

651 Visited[Src] = true;

652 while (!Queue.empty()) {

653 Src = Queue.front();

654 Queue.pop();

655 for (auto *Jump : Func.Blocks[Src].SuccJumps) {

657 if (Jump->Flow > 0 && !Visited[Dst]) {

658 Queue.push(Dst);

659 Visited[Dst] = true;

660 }

661 }

662 }

663 }

664

665

666

667 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {

668

669 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);

670

671 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);

672

673

674 std::vector<FlowJump *> Result;

677 return Result;

678 }

679

680

681

682

684

685 if (Source == Target)

686 return std::vector<FlowJump *>();

687 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)

688 return std::vector<FlowJump *>();

689

690

691 auto Distance = std::vector<int64_t>(NumBlocks(), INF);

692 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);

693 Distance[Source] = 0;

694 std::set<std::pair<uint64_t, uint64_t>> Queue;

695 Queue.insert(std::make_pair(Distance[Source], Source));

696

697

698 while (!Queue.empty()) {

699 uint64_t Src = Queue.begin()->second;

700 Queue.erase(Queue.begin());

701

703 (Func.Blocks[Src].isExit() && Target == AnyExitBlock))

704 break;

705

706 for (auto *Jump : Func.Blocks[Src].SuccJumps) {

708 int64_t JumpDist = jumpDistance(Jump);

709 if (Distance[Dst] > Distance[Src] + JumpDist) {

710 Queue.erase(std::make_pair(Distance[Dst], Dst));

711

712 Distance[Dst] = Distance[Src] + JumpDist;

713 Parent[Dst] = Jump;

714

715 Queue.insert(std::make_pair(Distance[Dst], Dst));

716 }

717 }

718 }

719

720 if (Target == AnyExitBlock) {

721 for (uint64_t I = 0; I < NumBlocks(); I++) {

722 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {

723 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {

725 }

726 }

727 }

728 }

729 assert(Parent[Target] != nullptr && "a path does not exist");

730

731

732 std::vector<FlowJump *> Result;

734 while (Now != Source) {

735 assert(Now == Parent[Now]->Target && "incorrect parent jump");

736 Result.push_back(Parent[Now]);

737 Now = Parent[Now]->Source;

738 }

739

740 std::reverse(Result.begin(), Result.end());

741 return Result;

742 }

743

744

745

746

747

748

749

750

751

752

753 int64_t jumpDistance(FlowJump *Jump) const {

757 std::max(FlowAdjuster::MinBaseDistance,

758 std::min(Func.Blocks[Func.Entry].Flow,

759 Params.CostUnlikely / (2 * (NumBlocks() + 1))));

760 if (Jump->Flow > 0)

761 return BaseDistance + BaseDistance / Jump->Flow;

762 return 2 * BaseDistance * (NumBlocks() + 1);

763 };

764

765 uint64_t NumBlocks() const { return Func.Blocks.size(); }

766

767

768

769

770

771 void rebalanceUnknownSubgraphs() {

772

773 for (const FlowBlock &SrcBlock : Func.Blocks) {

774

775 if (!canRebalanceAtRoot(&SrcBlock))

776 continue;

777

778

779

780 std::vector<FlowBlock *> UnknownBlocks;

781 std::vector<FlowBlock *> KnownDstBlocks;

782 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);

783

784

785

787 if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,

788 DstBlock))

789 continue;

790

791

792 if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))

793 continue;

794

795

796 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);

797 }

798 }

799

800

801 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {

802

803

805 return false;

806

807

808 bool HasUnknownSuccs = false;

809 for (auto *Jump : SrcBlock->SuccJumps) {

810 if (Func.Blocks[Jump->Target].HasUnknownWeight) {

811 HasUnknownSuccs = true;

812 break;

813 }

814 }

815 if (!HasUnknownSuccs)

816 return false;

817

818 return true;

819 }

820

821

822

823 void findUnknownSubgraph(const FlowBlock *SrcBlock,

824 std::vector<FlowBlock *> &KnownDstBlocks,

825 std::vector<FlowBlock *> &UnknownBlocks) {

826

827

828 auto Visited = BitVector(NumBlocks(), false);

829 std::queue<uint64_t> Queue;

830

831 Queue.push(SrcBlock->Index);

832 Visited[SrcBlock->Index] = true;

833 while (!Queue.empty()) {

834 auto &Block = Func.Blocks[Queue.front()];

835 Queue.pop();

836

837 for (auto *Jump : Block.SuccJumps) {

838

839 if (ignoreJump(SrcBlock, nullptr, Jump))

840 continue;

841

843

844 if (Visited[Dst])

845 continue;

846

847 Visited[Dst] = true;

848 if (!Func.Blocks[Dst].HasUnknownWeight) {

849 KnownDstBlocks.push_back(&Func.Blocks[Dst]);

850 } else {

851 Queue.push(Dst);

852 UnknownBlocks.push_back(&Func.Blocks[Dst]);

853 }

854 }

855 }

856 }

857

858

859

860 bool canRebalanceSubgraph(const FlowBlock *SrcBlock,

861 const std::vector<FlowBlock *> &KnownDstBlocks,

862 const std::vector<FlowBlock *> &UnknownBlocks,

864

865 if (UnknownBlocks.empty())

866 return false;

867

868

869 if (KnownDstBlocks.size() > 1)

870 return false;

871 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();

872

873

874 for (auto *Block : UnknownBlocks) {

875 if (Block->SuccJumps.empty()) {

876

877 if (DstBlock != nullptr)

878 return false;

879 continue;

880 }

881 size_t NumIgnoredJumps = 0;

882 for (auto *Jump : Block->SuccJumps) {

883 if (ignoreJump(SrcBlock, DstBlock, Jump))

884 NumIgnoredJumps++;

885 }

886

887

888 if (NumIgnoredJumps == Block->SuccJumps.size())

889 return false;

890 }

891

892 return true;

893 }

894

895

896

897 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,

899

901 return true;

902

903 auto JumpSource = &Func.Blocks[Jump->Source];

904 auto JumpTarget = &Func.Blocks[Jump->Target];

905

906

907 if (DstBlock != nullptr && JumpTarget == DstBlock)

908 return false;

909

910

911 if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)

912 return true;

913

914

915 if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)

916 return true;

917

918 return false;

919 }

920

921

922

923 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,

924 std::vector<FlowBlock *> &UnknownBlocks) {

925

926 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);

928 for (auto *Jump : Block->SuccJumps) {

929 if (ignoreJump(SrcBlock, DstBlock, Jump))

930 continue;

931 LocalInDegree[Jump->Target]++;

932 }

933 };

934 fillInDegree(SrcBlock);

935 for (auto *Block : UnknownBlocks) {

936 fillInDegree(Block);

937 }

938

939 if (LocalInDegree[SrcBlock->Index] > 0)

940 return false;

941

942 std::vector<FlowBlock *> AcyclicOrder;

943 std::queue<uint64_t> Queue;

944 Queue.push(SrcBlock->Index);

945 while (!Queue.empty()) {

947 Queue.pop();

948

949 if (DstBlock != nullptr && Block == DstBlock)

950 break;

951

952

953 if (Block->HasUnknownWeight && Block != SrcBlock)

954 AcyclicOrder.push_back(Block);

955

956

957 for (auto *Jump : Block->SuccJumps) {

958 if (ignoreJump(SrcBlock, DstBlock, Jump))

959 continue;

961 LocalInDegree[Dst]--;

962 if (LocalInDegree[Dst] == 0) {

963 Queue.push(Dst);

964 }

965 }

966 }

967

968

969

970 if (UnknownBlocks.size() != AcyclicOrder.size())

971 return false;

972 UnknownBlocks = AcyclicOrder;

973 return true;

974 }

975

976

977

978 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,

980 const std::vector<FlowBlock *> &UnknownBlocks) {

981 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");

982

983

985

986 for (auto *Jump : SrcBlock->SuccJumps) {

987 if (ignoreJump(SrcBlock, DstBlock, Jump))

988 continue;

989 BlockFlow += Jump->Flow;

990 }

991 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);

992

993

994 for (auto *Block : UnknownBlocks) {

995 assert(Block->HasUnknownWeight && "incorrect unknown subgraph");

997

998 for (auto *Jump : Block->PredJumps) {

999 BlockFlow += Jump->Flow;

1000 }

1001 Block->Flow = BlockFlow;

1002 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);

1003 }

1004 }

1005

1006

1007

1008 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,

1010

1011 size_t BlockDegree = 0;

1012 for (auto *Jump : Block->SuccJumps) {

1013 if (ignoreJump(SrcBlock, DstBlock, Jump))

1014 continue;

1015 BlockDegree++;

1016 }

1017

1018 if (DstBlock == nullptr && BlockDegree == 0)

1019 return;

1020 assert(BlockDegree > 0 && "all outgoing jumps are ignored");

1021

1022

1023

1024 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;

1025 for (auto *Jump : Block->SuccJumps) {

1026 if (ignoreJump(SrcBlock, DstBlock, Jump))

1027 continue;

1028 uint64_t Flow = std::min(SuccFlow, BlockFlow);

1030 BlockFlow -= Flow;

1031 }

1032 assert(BlockFlow == 0 && "not all flow is propagated");

1033 }

1034

1035

1037

1038 static constexpr uint64_t MinBaseDistance = 10000;

1039

1040

1042

1044};

1045

1046std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,

1048std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,

1050

1051

1052

1053

1054

1055

1056void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,

1058 uint64_t NumBlocks = Func.Blocks.size();

1059 assert(NumBlocks > 1 && "Too few blocks in a function");

1060 uint64_t NumJumps = Func.Jumps.size();

1061 assert(NumJumps > 0 && "Too few jumps in a function");

1062

1063

1064

1065

1066

1071

1072 Network.initialize(2 * NumBlocks + 4, S1, T1);

1073

1074

1075 for (uint64_t B = 0; B < NumBlocks; B++) {

1076 auto &Block = Func.Blocks[B];

1077

1078

1079

1082

1083

1084 if (Block.isEntry()) {

1085 Network.addEdge(S, Bin, 0);

1086 } else if (Block.isExit()) {

1087 Network.addEdge(Bout, T, 0);

1088 }

1089

1090

1091 auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);

1092

1093

1094 Network.addEdge(Bin, Bout, AuxCostInc);

1095 if (Block.Weight > 0) {

1096 Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);

1097 Network.addEdge(S1, Bout, Block.Weight, 0);

1098 Network.addEdge(Bin, T1, Block.Weight, 0);

1099 }

1100 }

1101

1102

1103 for (uint64_t J = 0; J < NumJumps; J++) {

1104 auto &Jump = Func.Jumps[J];

1105

1106

1109

1110

1111 auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);

1112

1113

1114 Network.addEdge(Jin, Jout, AuxCostInc);

1115 if (Jump.Weight > 0) {

1116 Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);

1117 Network.addEdge(S1, Jout, Jump.Weight, 0);

1118 Network.addEdge(Jin, T1, Jump.Weight, 0);

1119 }

1120 }

1121

1122

1123 Network.addEdge(T, S, 0);

1124}

1125

1126

1127std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,

1129

1130 if (Block.IsUnlikely)

1132

1133

1136

1137 if (Block.HasUnknownWeight) {

1139 CostDec = 0;

1140 } else {

1141

1142

1143 if (Block.Weight == 0)

1145

1146 if (Block.isEntry()) {

1149 }

1150 }

1151 return std::make_pair(CostInc, CostDec);

1152}

1153

1154

1155std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,

1157

1160

1161

1164

1166

1169 }

1171

1174 else

1176 CostDec = 0;

1177 }

1178 return std::make_pair(CostInc, CostDec);

1179}

1180

1181

1182void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,

1184 uint64_t NumBlocks = Func.Blocks.size();

1185 uint64_t NumJumps = Func.Jumps.size();

1186

1187

1188 for (uint64_t J = 0; J < NumJumps; J++) {

1189 auto &Jump = Func.Jumps[J];

1192

1193 int64_t Flow = 0;

1194 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);

1196 Flow = int64_t(Jump.Weight) + AuxFlow;

1197 else

1198 Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);

1199

1201 assert(Flow >= 0 && "negative jump flow");

1202 }

1203

1204

1205 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);

1206 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);

1207 for (auto &Jump : Func.Jumps) {

1210 }

1211 for (uint64_t B = 0; B < NumBlocks; B++) {

1212 auto &Block = Func.Blocks[B];

1213 Block.Flow = std::max(OutFlow[B], InFlow[B]);

1214 }

1215}

1216

1217#ifndef NDEBUG

1218

1220

1221 assert(Func.Entry == 0 && Func.Blocks[0].isEntry());

1222 size_t NumExitBlocks = 0;

1223 for (size_t I = 1; I < Func.Blocks.size(); I++) {

1224 assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");

1225 if (Func.Blocks[I].isExit())

1226 NumExitBlocks++;

1227 }

1228 assert(NumExitBlocks > 0 && "cannot find exit blocks");

1229

1230

1231 for (auto &Block : Func.Blocks) {

1232 std::unordered_set<uint64_t> UniqueSuccs;

1233 for (auto &Jump : Block.SuccJumps) {

1234 auto It = UniqueSuccs.insert(Jump->Target);

1235 assert(It.second && "input CFG contains parallel edges");

1236 }

1237 }

1238

1239 for (auto &Block : Func.Blocks) {

1241 "a block cannot be an entry and an exit");

1242 }

1243

1244 for (auto &Block : Func.Blocks) {

1246 "non-zero weight of a block w/o weight except for an entry");

1247 }

1248

1249 for (auto &Jump : Func.Jumps) {

1251 "non-zero weight of a jump w/o weight");

1252 }

1253}

1254

1255

1256void verifyOutput(const FlowFunction &Func) {

1257 const uint64_t NumBlocks = Func.Blocks.size();

1258 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);

1259 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);

1260 for (const auto &Jump : Func.Jumps) {

1263 }

1264

1267 for (uint64_t I = 0; I < NumBlocks; I++) {

1268 auto &Block = Func.Blocks[I];

1269 if (Block.isEntry()) {

1270 TotalInFlow += Block.Flow;

1271 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");

1272 } else if (Block.isExit()) {

1273 TotalOutFlow += Block.Flow;

1274 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");

1275 } else {

1276 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");

1277 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");

1278 }

1279 }

1280 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");

1281

1282

1283

1284

1285 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);

1286 for (const auto &Jump : Func.Jumps) {

1287 if (Jump.Flow > 0) {

1288 PositiveFlowEdges[Jump.Source].push_back(Jump.Target);

1289 }

1290 }

1291

1292

1293 std::queue<uint64_t> Queue;

1294 auto Visited = BitVector(NumBlocks, false);

1295 Queue.push(Func.Entry);

1296 Visited[Func.Entry] = true;

1297 while (!Queue.empty()) {

1298 uint64_t Src = Queue.front();

1299 Queue.pop();

1300 for (uint64_t Dst : PositiveFlowEdges[Src]) {

1301 if (!Visited[Dst]) {

1302 Queue.push(Dst);

1303 Visited[Dst] = true;

1304 }

1305 }

1306 }

1307

1308

1309

1310 for (uint64_t I = 0; I < NumBlocks; I++) {

1311 auto &Block = Func.Blocks[I];

1312 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");

1313 }

1314}

1315#endif

1316

1317}

1318

1319

1320

1322

1323 bool HasSamples = false;

1325 if (Block.Weight > 0)

1326 HasSamples = true;

1328 }

1329 for (FlowJump &Jump : Func.Jumps) {

1330 if (Jump.Weight > 0)

1331 HasSamples = true;

1333 }

1334

1335

1336 if (Func.Blocks.size() <= 1 || !HasSamples)

1337 return;

1338

1339#ifndef NDEBUG

1340

1341 verifyInput(Func);

1342#endif

1343

1344

1345 auto InferenceNetwork = MinCostMaxFlow(Params);

1346 initializeNetwork(Params, InferenceNetwork, Func);

1347 InferenceNetwork.run();

1348

1349

1350 extractWeights(Params, InferenceNetwork, Func);

1351

1352

1353 auto Adjuster = FlowAdjuster(Params, Func);

1354 Adjuster.run();

1355

1356#ifndef NDEBUG

1357

1358 verifyOutput(Func);

1359#endif

1360}

1361

1362

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements the BitVector class.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)

This file provides the interface for the profile inference algorithm, profi.

static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)

Initialize the set of available library functions based on the specified target triple.

Target - Wrapper for Target specific information.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)

Apply the profile inference algorithm for a given function and provided profi options.

Definition SampleProfileInference.cpp:1321

A wrapper of a binary basic block.

std::vector< FlowJump * > SuccJumps

A wrapper of binary function with basic blocks and jumps.

A wrapper of a jump between two basic blocks.

Various thresholds and options controlling the behavior of the profile inference algorithm.

unsigned CostJumpUnknownFTInc

The cost of increasing an unknown fall-through jump's count by one.

unsigned CostBlockInc

The cost of increasing a block's count by one.

unsigned CostJumpFTInc

The cost of increasing a fall-through jump's count by one.

bool RebalanceUnknown

Evenly re-distribute flow among unknown subgraphs.

const int64_t CostUnlikely

The cost of taking an unlikely block/jump.

unsigned CostJumpDec

The cost of decreasing a jump's count by one.

bool JoinIslands

Join isolated components having positive flow.

unsigned CostBlockZeroInc

The cost of increasing a count of zero-weight block by one.

unsigned CostBlockEntryDec

The cost of decreasing the entry block's count by one.

unsigned CostJumpInc

The cost of increasing a jump's count by one.

unsigned CostJumpUnknownInc

The cost of increasing an unknown jump's count by one.

unsigned CostBlockUnknownInc

The cost of increasing an unknown block's count by one.

unsigned CostJumpFTDec

The cost of decreasing a fall-through jump's count by one.

unsigned CostBlockDec

The cost of decreasing a block's count by one.

unsigned CostBlockEntryInc

The cost of increasing the entry block's count by one.

bool EvenFlowDistribution

Evenly distribute flow when there are multiple equally likely options.