LLVM: lib/Transforms/Scalar/LoopUnrollPass.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

63#include

64#include

65#include

66#include

67#include

68#include

69#include

70#include

71

72using namespace llvm;

73

74#define DEBUG_TYPE "loop-unroll"

75

78 cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"

79 " the current top-most loop. This is sometimes preferred to reduce"

80 " compile time."));

81

84 cl::desc("The cost threshold for loop unrolling"));

85

89 cl::desc("The cost threshold for loop unrolling when optimizing for "

90 "size"));

91

93 "unroll-partial-threshold", cl::Hidden,

94 cl::desc("The cost threshold for partial loop unrolling"));

95

98 cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "

99 "to the threshold when aggressively unrolling a loop due to the "

100 "dynamic cost savings. If completely unrolling a loop will reduce "

101 "the total runtime from X to Y, we boost the loop unroll "

102 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "

103 "X/Y). This limit avoids excessive code bloat."));

104

107 cl::desc("Don't allow loop unrolling to simulate more than this number of "

108 "iterations when checking full unroll profitability"));

109

112 cl::desc("Use this unroll count for all loops including those with "

113 "unroll_count pragma values, for testing purposes"));

114

117 cl::desc("Set the max unroll count for partial and runtime unrolling, for"

118 "testing purposes"));

119

123 "Set the max unroll count for full unrolling, for testing purposes"));

124

127 cl::desc("Allows loops to be partially unrolled until "

128 "-unroll-threshold loop size is reached."));

129

131 "unroll-allow-remainder", cl::Hidden,

132 cl::desc("Allow generation of a loop remainder (extra iterations) "

133 "when unrolling a loop."));

134

137 cl::desc("Unroll loops with run-time trip counts"));

138

142 "The max of trip count upper bound that is considered in unrolling"));

143

146 cl::desc("Unrolled size limit for loops with an unroll(full) or "

147 "unroll_count pragma."));

148

151 cl::desc("If the runtime tripcount for the loop is lower than the "

152 "threshold, the loop is considered as flat and will be less "

153 "aggressively unrolled."));

154

157 cl::desc("Allow the loop remainder to be unrolled."));

158

159

160

161

163 "unroll-revisit-child-loops", cl::Hidden,

164 cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "

165 "This shouldn't typically be needed as child loops (or their "

166 "clones) were already visited."));

167

170 cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "

171 "optimizations"));

175 cl::desc("Default threshold (max size of unrolled "

176 "loop), used in all but O3 optimizations"));

177

179 "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,

180 cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));

181

182

183

184

185static const unsigned NoThreshold = std::numeric_limits::max();

186

187

188

193 std::optional UserThreshold, std::optional UserCount,

194 std::optional UserAllowPartial, std::optional UserRuntime,

195 std::optional UserUpperBound,

196 std::optional UserFullUnrollMaxCount) {

198

199

208 UP.MaxCount = std::numeric_limits::max();

217 UP.Force = false;

223

224

226

227

228 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||

229

232 PGSOQueryType::IRPass));

233 if (OptForSize) {

237 }

238

239

264

265

266 if (UserThreshold) {

269 }

270 if (UserCount)

271 UP.Count = *UserCount;

272 if (UserAllowPartial)

273 UP.Partial = *UserAllowPartial;

274 if (UserRuntime)

275 UP.Runtime = *UserRuntime;

276 if (UserUpperBound)

278 if (UserFullUnrollMaxCount)

280

281 return UP;

282}

283

284namespace {

285

286

287

288

289

290

291

292struct UnrolledInstState {

294 int Iteration : 30;

295 unsigned IsFree : 1;

296 unsigned IsCounted : 1;

297};

298

299

300struct UnrolledInstStateKeyInfo {

303

304 static inline UnrolledInstState getEmptyKey() {

305 return {PtrInfo::getEmptyKey(), 0, 0, 0};

306 }

307

308 static inline UnrolledInstState getTombstoneKey() {

309 return {PtrInfo::getTombstoneKey(), 0, 0, 0};

310 }

311

312 static inline unsigned getHashValue(const UnrolledInstState &S) {

313 return PairInfo::getHashValue({S.I, S.Iteration});

314 }

315

316 static inline bool isEqual(const UnrolledInstState &LHS,

317 const UnrolledInstState &RHS) {

318 return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});

319 }

320};

321

322struct EstimatedUnrollCost {

323

324 unsigned UnrolledCost;

325

326

327

328 unsigned RolledDynamicCost;

329};

330

331struct PragmaInfo {

332 PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)

333 : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),

334 PragmaEnableUnroll(PEU) {}

335 const bool UserUnrollCount;

336 const bool PragmaFullUnroll;

337 const unsigned PragmaCount;

338 const bool PragmaEnableUnroll;

339};

340

341}

342

343

344

345

346

347

348

349

350

351

352

353

354

355

360 unsigned MaxIterationsCountToAnalyze) {

361

362

363

364 assert(MaxIterationsCountToAnalyze <

365 (unsigned)(std::numeric_limits::max() / 2) &&

366 "The unroll iterations max is too large!");

367

368

369

370 if (!L->isInnermost())

371 return std::nullopt;

372

373

374 if (!TripCount || TripCount > MaxIterationsCountToAnalyze)

375 return std::nullopt;

376

381

382

383

385

386

387

388

389

390

392

393

394

395

396

398

399

400

402

403

405

406

407 auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {

408 assert(Iteration >= 0 && "Cannot have a negative iteration!");

409 assert(CostWorklist.empty() && "Must start with an empty cost list");

410 assert(PHIUsedList.empty() && "Must start with an empty phi used list");

416 for (;; --Iteration) {

417 do {

419

420

421

422 auto CostIter = InstCostMap.find({I, Iteration, 0, 0});

423 if (CostIter == InstCostMap.end())

424

425

426

427 continue;

428 auto &Cost = *CostIter;

429 if (Cost.IsCounted)

430

431 continue;

432

433

434 Cost.IsCounted = true;

435

436

437 if (auto *PhiI = dyn_cast(I))

438 if (PhiI->getParent() == L->getHeader()) {

439 assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "

440 "inherently simplify during unrolling.");

441 if (Iteration == 0)

442 continue;

443

444

445

446

447 if (auto *OpI = dyn_cast(

448 PhiI->getIncomingValueForBlock(L->getLoopLatch())))

449 if (L->contains(OpI))

451 continue;

452 }

453

454

455 if (Cost.IsFree) {

456

460 if (auto Res = SimplifiedValues.lookup(Op))

461 return Res;

462 return Op;

463 });

465 LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "

466 << Iteration << "): ");

468 }

469

470

471

472

473 for (Value *Op : I->operands()) {

474

475

476 auto *OpI = dyn_cast(Op);

477 if (!OpI || !L->contains(OpI))

478 continue;

479

480

482 }

483 } while (!CostWorklist.empty());

484

485 if (PHIUsedList.empty())

486

487 break;

488

489 assert(Iteration > 0 &&

490 "Cannot track PHI-used values past the first iteration!");

491 CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());

492 PHIUsedList.clear();

493 }

494 };

495

496

497

498 assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");

499 assert(L->isLCSSAForm(DT) &&

500 "Must have loops in LCSSA form to track live-out values.");

501

502 LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

503

505 L->getHeader()->getParent()->hasMinSize() ?

507

508

509

510

511 for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {

512 LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

513

514

515

517 auto *PHI = dyn_cast(&I);

518 if (PHI)

519 break;

520

521

522

524 PHI->getNumIncomingValues() == 2 &&

525 "Must have an incoming value only for the preheader and the latch.");

526

527 Value *V = PHI->getIncomingValueForBlock(

528 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());

529 if (Iteration != 0 && SimplifiedValues.count(V))

530 V = SimplifiedValues.lookup(V);

532 }

533

534

535 SimplifiedValues.clear();

536 while (!SimplifiedInputValues.empty())

538

540

541 BBWorklist.clear();

542 BBWorklist.insert(L->getHeader());

543

544 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {

546

547

548

549

551

552

553 if (isa(I) || EphValues.count(&I))

554 continue;

555

556

557

559

560

561

562

563 bool IsFree = Analyzer.visit(I);

564 bool Inserted = InstCostMap.insert({&I, (int)Iteration,

565 (unsigned)IsFree,

566 false}).second;

567 (void)Inserted;

568 assert(Inserted && "Cannot have a state for an unvisited instruction!");

569

570 if (IsFree)

571 continue;

572

573

574

575 if (auto *CI = dyn_cast(&I)) {

576 const Function *Callee = CI->getCalledFunction();

578 LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");

579 return std::nullopt;

580 }

581 }

582

583

584

585 if (I.mayHaveSideEffects())

586 AddCostRecursively(I, Iteration);

587

588

589 if (UnrolledCost > MaxUnrolledLoopSize) {

590 LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"

591 << " UnrolledCost: " << UnrolledCost

592 << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize

593 << "\n");

594 return std::nullopt;

595 }

596 }

597

599

600 auto getSimplifiedConstant = [&](Value *V) -> Constant * {

601 if (SimplifiedValues.count(V))

602 V = SimplifiedValues.lookup(V);

603 return dyn_cast(V);

604 };

605

606

607

609 if (BranchInst *BI = dyn_cast(TI)) {

610 if (BI->isConditional()) {

611 if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {

612

613 if (isa(SimpleCond))

614 KnownSucc = BI->getSuccessor(0);

616 dyn_cast(SimpleCond))

617 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);

618 }

619 }

620 } else if (SwitchInst *SI = dyn_cast(TI)) {

621 if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {

622

623 if (isa(SimpleCond))

624 KnownSucc = SI->getSuccessor(0);

626 dyn_cast(SimpleCond))

627 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();

628 }

629 }

630 if (KnownSucc) {

631 if (L->contains(KnownSucc))

632 BBWorklist.insert(KnownSucc);

633 else

634 ExitWorklist.insert({BB, KnownSucc});

635 continue;

636 }

637

638

640 if (L->contains(Succ))

641 BBWorklist.insert(Succ);

642 else

643 ExitWorklist.insert({BB, Succ});

644 AddCostRecursively(*TI, Iteration);

645 }

646

647

648

649 if (UnrolledCost == RolledDynamicCost) {

650 LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"

651 << " UnrolledCost: " << UnrolledCost << "\n");

652 return std::nullopt;

653 }

654 }

655

656 while (!ExitWorklist.empty()) {

658 std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();

659

661 auto *PN = dyn_cast(&I);

662 if (!PN)

663 break;

664

665 Value *Op = PN->getIncomingValueForBlock(ExitingBB);

666 if (auto *OpI = dyn_cast(Op))

667 if (L->contains(OpI))

668 AddCostRecursively(*OpI, TripCount - 1);

669 }

670 }

671

673 "All instructions must have a valid cost, whether the "

674 "loop is rolled or unrolled.");

675

677 << "UnrolledCost: " << UnrolledCost << ", "

678 << "RolledDynamicCost: " << RolledDynamicCost << "\n");

681}

682

688 Metrics.analyzeBasicBlock(BB, TTI, EphValues, false,

689 L);

691 NotDuplicatable = Metrics.notDuplicatable;

693 LoopSize = Metrics.NumInsts;

697

698

699

700

701

702

703

704 if (LoopSize.isValid() && LoopSize < BEInsns + 1)

705

706 LoopSize = BEInsns + 1;

707}

708

712 LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");

713 return false;

714 default:

715 break;

716 }

717 if (!LoopSize.isValid()) {

718 LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");

719 return false;

720 }

721 if (NotDuplicatable) {

722 LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");

723 return false;

724 }

725 return true;

726}

727

730 unsigned CountOverwrite) const {

731 unsigned LS = *LoopSize.getValue();

732 assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");

733 if (CountOverwrite)

735 else

737}

738

739

740

741

743 if (MDNode *LoopID = L->getLoopID())

745 return nullptr;

746}

747

748

751}

752

753

754

757}

758

759

762}

763

764

765

768 if (MD) {

770 "Unroll count hint metadata should have two operands.");

771 unsigned Count =

772 mdconst::extract(MD->getOperand(1))->getZExtValue();

773 assert(Count >= 1 && "Unroll count must be positive.");

774 return Count;

775 }

776 return 0;

777}

778

779

780

781

782

783

785 unsigned MaxPercentThresholdBoost) {

786 if (Cost.RolledDynamicCost >= std::numeric_limits::max() / 100)

787 return 100;

788 else if (Cost.UnrolledCost != 0)

789

790 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,

791 MaxPercentThresholdBoost);

792 else

793 return MaxPercentThresholdBoost;

794}

795

796static std::optional

798 const unsigned TripMultiple, const unsigned TripCount,

801

802

803

804

805 if (PInfo.UserUnrollCount) {

809 }

810

811

812 if (PInfo.PragmaCount > 0) {

813 if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))

814 return PInfo.PragmaCount;

815 }

816

817 if (PInfo.PragmaFullUnroll && TripCount != 0) {

818

819

820

822 LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");

823 return std::nullopt;

824 }

825

826 return TripCount;

827 }

828

829 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&

831 return MaxTripCount;

832

833

834 return std::nullopt;

835}

836

842 assert(FullUnrollTripCount && "should be non-zero!");

843

845 return std::nullopt;

846

847

848

850 return FullUnrollTripCount;

851

852

853

854

856 L, FullUnrollTripCount, DT, SE, EphValues, TTI,

859 unsigned Boost =

861 if (Cost->UnrolledCost < UP.Threshold * Boost / 100)

862 return FullUnrollTripCount;

863 }

864 return std::nullopt;

865}

866

867static std::optional

871

872 if (!TripCount)

873 return std::nullopt;

874

876 LLVM_DEBUG(dbgs() << " will not try to unroll partially because "

877 << "-unroll-allow-partial not given\n");

878 return 0;

879 }

882 count = TripCount;

884

890 while (count != 0 && TripCount % count != 0)

893

894

895

896

898 while (count != 0 &&

901 }

904 }

905 } else {

906 count = TripCount;

907 }

910

911 LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");

912

914}

915

916

917

918

919

920

921

922

931

933

934 const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;

938

939 const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||

940 PragmaEnableUnroll || UserUnrollCount;

941

942 PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,

943 PragmaEnableUnroll);

944

945

947 if (UnrollCount.getNumOccurrences() > 0) {

949 "explicit unroll count", false);

950 }

953 return true;

954 }

955

956

957

958 if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,

959 MaxTripCount, UCE, UP)) {

960 UP.Count = *UnrollFactor;

961

962 if (UserUnrollCount || (PragmaCount > 0)) {

965 }

966 UP.Runtime |= (PragmaCount > 0);

967 return ExplicitUnroll;

968 } else {

969 if (ExplicitUnroll && TripCount != 0) {

970

971

972

976 }

977 }

978

979

980

982 if (TripCount) {

983 UP.Count = TripCount;

985 TripCount, UCE, UP)) {

986 UP.Count = *UnrollFactor;

987 UseUpperBound = false;

988 return ExplicitUnroll;

989 }

990 }

991

992

993

994

995

996

997

998

999

1000

1001

1002

1003

1004 if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&

1006 UP.Count = MaxTripCount;

1008 MaxTripCount, UCE, UP)) {

1009 UP.Count = *UnrollFactor;

1010 UseUpperBound = true;

1011 return ExplicitUnroll;

1012 }

1013 }

1014

1015

1020 return ExplicitUnroll;

1021 }

1022

1023

1024

1025 if (TripCount)

1026 UP.Partial |= ExplicitUnroll;

1027

1028

1029

1030 if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {

1031 UP.Count = *UnrollFactor;

1032

1033 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&

1034 UP.Count != TripCount)

1035 ORE->emit([&]() {

1037 "FullUnrollAsDirectedTooLarge",

1038 L->getStartLoc(), L->getHeader())

1039 << "Unable to fully unroll loop as directed by unroll pragma "

1040 "because "

1041 "unrolled size is too large.";

1042 });

1043

1045 if (UP.Count == 0) {

1046 if (PragmaEnableUnroll)

1047 ORE->emit([&]() {

1049 "UnrollAsDirectedTooLarge",

1050 L->getStartLoc(), L->getHeader())

1051 << "Unable to unroll loop as directed by unroll(enable) "

1052 "pragma "

1053 "because unrolled size is too large.";

1054 });

1055 }

1056 }

1057 return ExplicitUnroll;

1058 }

1059 assert(TripCount == 0 &&

1060 "All cases when TripCount is constant should be covered here.");

1061 if (PragmaFullUnroll)

1062 ORE->emit([&]() {

1064 DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",

1065 L->getStartLoc(), L->getHeader())

1066 << "Unable to fully unroll loop as directed by unroll(full) "

1067 "pragma "

1068 "because loop has a runtime trip count.";

1069 });

1070

1071

1072

1075 return false;

1076 }

1077

1078

1081 return false;

1082 }

1083

1084

1085 if (L->getHeader()->getParent()->hasProfileData()) {

1088 return false;

1089 else

1091 }

1092 }

1093 UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;

1096 dbgs() << " will not try to unroll loop with runtime trip count "

1097 << "-unroll-runtime not given\n");

1099 return false;

1100 }

1101 if (UP.Count == 0)

1103

1104

1105

1106 while (UP.Count != 0 &&

1109

1110#ifndef NDEBUG

1111 unsigned OrigCount = UP.Count;

1112#endif

1113

1115 while (UP.Count != 0 && TripMultiple % UP.Count != 0)

1118 dbgs() << "Remainder loop is restricted (that could architecture "

1119 "specific or because the loop contains a convergent "

1120 "instruction), so unroll count must divide the trip "

1121 "multiple, "

1122 << TripMultiple << ". Reducing unroll count from " << OrigCount

1123 << " to " << UP.Count << ".\n");

1124

1125 using namespace ore;

1126

1128 ORE->emit([&]() {

1130 "DifferentUnrollCountFromDirected",

1131 L->getStartLoc(), L->getHeader())

1132 << "Unable to unroll loop the number of times directed by "

1133 "unroll_count pragma because remainder loop is restricted "

1134 "(that could architecture specific or because the loop "

1135 "contains a convergent instruction) and so must have an "

1136 "unroll "

1137 "count that divides the loop trip multiple of "

1138 << NV("TripMultiple", TripMultiple) << ". Unrolling instead "

1139 << NV("UnrollCount", UP.Count) << " time(s).";

1140 });

1141 }

1142

1145

1146 if (MaxTripCount && UP.Count > MaxTripCount)

1147 UP.Count = MaxTripCount;

1148

1150 << "\n");

1151 if (UP.Count < 2)

1153 return ExplicitUnroll;

1154}

1155

1161 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,

1162 std::optional ProvidedCount,

1163 std::optional ProvidedThreshold,

1164 std::optional ProvidedAllowPartial,

1165 std::optional ProvidedRuntime,

1166 std::optional ProvidedUpperBound,

1167 std::optional ProvidedAllowPeeling,

1168 std::optional ProvidedAllowProfileBasedPeeling,

1169 std::optional ProvidedFullUnrollMaxCount,

1171

1173 << L->getHeader()->getParent()->getName() << "] Loop %"

1174 << L->getHeader()->getName() << "\n");

1178

1179

1180

1181

1182

1183 Loop *ParentL = L->getParentLoop();

1184 if (ParentL != nullptr &&

1187 LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"

1188 << " llvm.loop.unroll_and_jam.\n");

1190 }

1191

1192

1193

1194

1199 << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");

1201 }

1202

1203 if (!L->isLoopSimplifyForm()) {

1205 dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");

1207 }

1208

1209

1210

1211 if (OnlyWhenForced && !(TM & TM_Enable))

1213

1214 bool OptForSize = L->getHeader()->getParent()->hasOptSize();

1216 L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,

1217 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,

1218 ProvidedFullUnrollMaxCount);

1220 L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);

1221

1222

1223

1225 !OptForSize)

1227

1230

1233 LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");

1235 }

1236

1238 LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");

1239

1240

1241

1242 if (OptForSize)

1244

1246 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");

1248 }

1249

1250

1251

1252

1253

1254

1255 unsigned TripCount = 0;

1256 unsigned TripMultiple = 1;

1258 L->getExitingBlocks(ExitingBlocks);

1259 for (BasicBlock *ExitingBlock : ExitingBlocks)

1261 if (!TripCount || TC < TripCount)

1262 TripCount = TripMultiple = TC;

1263

1264 if (!TripCount) {

1265

1266

1267

1268 BasicBlock *ExitingBlock = L->getLoopLatch();

1269 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))

1270 ExitingBlock = L->getExitingBlock();

1271 if (ExitingBlock)

1273 }

1274

1275

1276

1277

1278

1279

1280

1281

1283

1284

1285

1286 unsigned MaxTripCount = 0;

1287 bool MaxOrZero = false;

1288 if (!TripCount) {

1291 }

1292

1293

1294

1295 bool UseUpperBound = false;

1297 L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,

1298 MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);

1301

1303

1305 assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");

1306 LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()

1307 << " with iteration count " << PP.PeelCount << "!\n");

1308 ORE.emit([&]() {

1310 L->getHeader())

1312 << " iterations";

1313 });

1314

1316 if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {

1318

1319

1321 L->setLoopAlreadyUnrolled();

1323 }

1325 }

1326

1327

1328 if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {

1330 dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");

1332 }

1333

1334

1335

1336

1337

1338

1339 UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;

1340

1341

1342 MDNode *OrigLoopID = L->getLoopID();

1343

1344

1345 Loop *RemainderLoop = nullptr;

1356 L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);

1359

1360 if (RemainderLoop) {

1361 std::optional<MDNode *> RemainderLoopID =

1364 if (RemainderLoopID)

1365 RemainderLoop->setLoopID(*RemainderLoopID);

1366 }

1367

1369 std::optional<MDNode *> NewLoopID =

1372 if (NewLoopID) {

1373 L->setLoopID(*NewLoopID);

1374

1375

1376

1377 return UnrollResult;

1378 }

1379 }

1380

1381

1382

1384 L->setLoopAlreadyUnrolled();

1385

1386 return UnrollResult;

1387}

1388

1389namespace {

1390

1391class LoopUnroll : public LoopPass {

1392public:

1393 static char ID;

1394

1395 int OptLevel;

1396

1397

1398

1399

1400 bool OnlyWhenForced;

1401

1402

1403

1404

1405 bool ForgetAllSCEV;

1406

1407 std::optional ProvidedCount;

1408 std::optional ProvidedThreshold;

1409 std::optional ProvidedAllowPartial;

1410 std::optional ProvidedRuntime;

1411 std::optional ProvidedUpperBound;

1412 std::optional ProvidedAllowPeeling;

1413 std::optional ProvidedAllowProfileBasedPeeling;

1414 std::optional ProvidedFullUnrollMaxCount;

1415

1416 LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,

1417 bool ForgetAllSCEV = false,

1418 std::optional Threshold = std::nullopt,

1419 std::optional Count = std::nullopt,

1420 std::optional AllowPartial = std::nullopt,

1421 std::optional Runtime = std::nullopt,

1422 std::optional UpperBound = std::nullopt,

1423 std::optional AllowPeeling = std::nullopt,

1424 std::optional AllowProfileBasedPeeling = std::nullopt,

1425 std::optional ProvidedFullUnrollMaxCount = std::nullopt)

1426 : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),

1427 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),

1428 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),

1429 ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),

1430 ProvidedAllowPeeling(AllowPeeling),

1431 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),

1432 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {

1434 }

1435

1437 if (skipLoop(L))

1438 return false;

1439

1440 Function &F = *L->getHeader()->getParent();

1441

1442 auto &DT = getAnalysis().getDomTree();

1443 LoopInfo *LI = &getAnalysis().getLoopInfo();

1444 ScalarEvolution &SE = getAnalysis().getSE();

1446 getAnalysis().getTTI(F);

1447 auto &AC = getAnalysis().getAssumptionCache(F);

1448

1449

1450

1452 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);

1453

1455 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,

1456 false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,

1457 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,

1458 ProvidedUpperBound, ProvidedAllowPeeling,

1459 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);

1460

1461 if (Result == LoopUnrollResult::FullyUnrolled)

1463

1464 return Result != LoopUnrollResult::Unmodified;

1465 }

1466

1467

1468

1469 void getAnalysisUsage(AnalysisUsage &AU) const override {

1472

1473

1475 }

1476};

1477

1478}

1479

1480char LoopUnroll::ID = 0;

1481

1487

1489 bool ForgetAllSCEV, int Threshold, int Count,

1490 int AllowPartial, int Runtime, int UpperBound,

1491 int AllowPeeling) {

1492

1493

1494

1495 return new LoopUnroll(

1496 OptLevel, OnlyWhenForced, ForgetAllSCEV,

1497 Threshold == -1 ? std::nullopt : std::optional(Threshold),

1498 Count == -1 ? std::nullopt : std::optional(Count),

1499 AllowPartial == -1 ? std::nullopt : std::optional(AllowPartial),

1500 Runtime == -1 ? std::nullopt : std::optional(Runtime),

1501 UpperBound == -1 ? std::nullopt : std::optional(UpperBound),

1502 AllowPeeling == -1 ? std::nullopt : std::optional(AllowPeeling));

1503}

1504

1508

1509

1510

1512

1513

1514

1515 Loop *ParentL = L.getParentLoop();

1517 if (ParentL)

1519 else

1521

1522 std::string LoopName = std::string(L.getName());

1523

1524 bool Changed =

1526 nullptr, nullptr,

1527 true, OptLevel, true,

1528 OnlyWhenForced, ForgetSCEV, std::nullopt,

1529 std::nullopt, false,

1530 false, false,

1531 true,

1532 false,

1533 std::nullopt) !=

1535 if (!Changed)

1537

1538

1539#ifndef NDEBUG

1540 if (ParentL)

1542#endif

1543

1544

1545

1546

1547

1548

1549

1550

1551

1552

1553

1554

1555

1556

1557

1558

1559

1560 bool IsCurrentLoopValid = false;

1562 if (ParentL)

1564 else

1567 if (SibLoop == &L) {

1568 IsCurrentLoopValid = true;

1569 return true;

1570 }

1571

1572

1573 return OldLoops.contains(SibLoop);

1574 });

1576

1577 if (!IsCurrentLoopValid) {

1579 } else {

1580

1582

1585 }

1586 }

1587

1589}

1590

1594

1595

1604

1607 LAM = &LAMProxy->getManager();

1608

1612 auto *BFI = (PSI && PSI->hasProfileSummary()) ?

1614

1615 bool Changed = false;

1616

1617

1618

1619

1620

1621

1622 for (const auto &L : LI) {

1623 Changed |=

1624 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false );

1626 }

1627

1628

1629

1632

1633 while (!Worklist.empty()) {

1634

1635

1636

1637

1639#ifndef NDEBUG

1640 Loop *ParentL = L.getParentLoop();

1641#endif

1642

1643

1644

1645

1646 std::optional LocalAllowPeeling = UnrollOpts.AllowPeeling;

1647 if (PSI && PSI->hasHugeWorkingSetSize())

1648 LocalAllowPeeling = false;

1649 std::string LoopName = std::string(L.getName());

1650

1651

1653 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,

1654 true, UnrollOpts.OptLevel, false,

1656 std::nullopt,

1657 std::nullopt, UnrollOpts.AllowPartial,

1660 &AA);

1662

1663

1664#ifndef NDEBUG

1667#endif

1668

1669

1672 }

1673

1674 if (!Changed)

1676

1678}

1679

1683 OS, MapClassName2PassName);

1684 OS << '<';

1685 if (UnrollOpts.AllowPartial != std::nullopt)

1686 OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";

1687 if (UnrollOpts.AllowPeeling != std::nullopt)

1688 OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";

1689 if (UnrollOpts.AllowRuntime != std::nullopt)

1690 OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";

1692 OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";

1695 << "profile-peeling;";

1699 OS << '>';

1700}

static bool isEqual(const Function &Caller, const Function &Callee)

This file contains the declarations for the subclasses of Constant, which represent the different fla...

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file defines DenseMapInfo traits for DenseMap.

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

This file provides various utilities for inspecting and working with the control flow graph in LLVM I...

This header defines various interfaces for pass management in LLVM.

This header provides classes for managing per-loop analyses.

This header provides classes for managing a pipeline of passes over loops in LLVM IR.

static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)

static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))

static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))

static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))

static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))

static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))

static bool hasUnrollFullPragma(const Loop *L)

static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))

static unsigned unrollCountPragmaValue(const Loop *L)

static bool hasUnrollEnablePragma(const Loop *L)

static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))

static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))

static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)

static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)

Figure out if the loop is worth full unrolling.

static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))

static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))

static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)

static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))

static const unsigned NoThreshold

A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...

static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)

static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))

static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))

static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))

static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)

static bool hasRuntimeUnrollDisablePragma(const Loop *L)

static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)

static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))

static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability"))

static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))

static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))

static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))

mir Rename Register Operands

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This pass exposes codegen information to IR-level passes.

A manager for alias analyses.

A container for analyses that lazily runs them and caches their results.

void clear(IRUnitT &IR, llvm::StringRef Name)

Clear any cached analysis results for a single unit of IR.

PassT::Result * getCachedResult(IRUnitT &IR) const

Get the cached result of an analysis pass for a given IR unit.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Analysis pass which computes BlockFrequencyInfo.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Conditional or Unconditional Branch instruction.

This is the shared class of boolean and integer constants.

This is an important base class in LLVM.

This class represents an Operation in the Expression.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Implements a dense probed hash-table based set.

Analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

bool hasMinSize() const

Optimize this function for minimum size (-Oz).

An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...

std::optional< CostType > getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

const Function * getFunction() const

Return the function this instruction belongs to.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

void addChildLoops(ArrayRef< Loop * > NewChildLoops)

Loop passes should use this method to indicate they have added new child loops of the current loop.

void markLoopAsDeleted(Loop &L, llvm::StringRef Name)

Loop passes should use this method to indicate they have deleted a loop from the nest.

void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)

Loop passes should use this method to indicate they have added new sibling loops to the current loop.

void markLoopAsDeleted(Loop &L)

Analysis pass that exposes the LoopInfo for a function.

void verifyLoop() const

Verify loop structure.

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)

Represents a single loop in the control flow graph.

void setLoopID(MDNode *LoopID) const

Set the llvm.loop loop id metadata for this loop.

const MDOperand & getOperand(unsigned I) const

unsigned getNumOperands() const

Return number of MDNode operands.

An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

bool empty() const

Determine if the PriorityWorklist is empty or not.

An analysis pass based on the new PM to deliver ProfileSummaryInfo.

Analysis providing profile information.

Analysis pass that exposes the ScalarEvolution for a function.

The main scalar evolution driver.

unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)

Returns the largest constant divisor of the trip count as a normal unsigned value,...

unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Returns the upper bound of the loop trip count as a normal unsigned value.

bool isBackedgeTakenCountMaxOrZero(const Loop *L)

Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...

unsigned getSmallConstantTripCount(const Loop *L)

Returns the exact trip count of the loop if we can compute it, and the result is a small constant.

size_type size() const

Determine the number of elements in the SetVector.

void clear()

Completely clear the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

bool insert(const value_type &X)

Insert a new element into the SetVector.

value_type pop_back_val()

A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

Analysis pass providing the TargetTransformInfo.

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const

Get target-customized preferences for the generic loop unrolling transformation.

TargetCostKind

The kind of cost model.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

bool isLoweredToCall(const Function *F) const

Test whether calls to a function lower to actual program function calls.

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

Produce an estimate of the unrolled cost of the specified loop.

ConvergenceKind Convergence

bool ConvergenceAllowsRuntime

uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const

Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.

bool canUnroll() const

Whether it is legal to unroll this loop.

unsigned NumInlineCandidates

UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)

uint64_t getRolledLoopSize() const

LLVM Value Representation.

int getNumOccurrences() const

std::pair< iterator, bool > insert(const ValueT &V)

iterator find(const_arg_type_t< ValueT > V)

An efficient, type-erasing, non-owning reference to a callable.

This class implements an extremely fast bulk output stream that can only output to a stream.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

DiagnosticInfoOptimizationBase::Argument NV

This is an optimization pass for GlobalISel generic memory operations.

bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

Simplify each loop in a loop nest recursively.

std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)

Returns a loop's estimated trip count based on branch weight metadata.

void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, AAResults *AA=nullptr)

Perform some cleanup and simplifications on loops after unrolling.

void initializeLoopUnrollPass(PassRegistry &)

void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)

auto successors(const MachineBasicBlock *BB)

@ Runtime

Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...

bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)

Put a loop nest into LCSSA form.

std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)

Create a new loop identifier for a loop created from a loop transformation.

bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)

OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)

Wrapper function around std::transform to apply a function to a range and store the result elsewhere.

TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)

CallBase * getLoopConvergenceHeart(const Loop *TheLoop)

Find the convergence heart of the loop.

TransformationMode hasUnrollAndJamTransformation(const Loop *L)

cl::opt< bool > ForgetSCEVInLoopUnroll

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

cl::opt< unsigned > SCEVCheapExpansionBudget

TransformationMode hasUnrollTransformation(const Loop *L)

LoopUnrollResult

Represents the result of a UnrollLoop invocation.

@ PartiallyUnrolled

The loop was partially unrolled – we still have a loop, but with a smaller trip count.

@ Unmodified

The loop was not modified.

@ FullyUnrolled

The loop was fully unrolled into straight-line code.

bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)

void getLoopAnalysisUsage(AnalysisUsage &AU)

Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.

const char *const LLVMLoopUnrollFollowupAll

TransformationMode

The mode sets how eager a transformation should be applied.

@ TM_ForcedByUser

The transformation was directed by the user, e.g.

@ TM_Disable

The transformation should not be applied.

@ TM_Enable

The transformation should be applied without considering a cost model.

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)

Utility that implements appending of loops onto a worklist given a range.

TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)

Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

const char *const LLVMLoopUnrollFollowupRemainder

PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

const char *const LLVMLoopUnrollFollowupUnrolled

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)

Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...

LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)

Unroll the given loop by Count.

bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)

VMap is the value-map that maps instructions from the original loop to instructions in the last peele...

Implement std::hash so that hash_code can be used in STL containers.

Utility to calculate the size and a few similar metrics for a set of basic blocks.

static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)

Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).

An information struct used to provide DenseMap with the various necessary components for a given valu...

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...

TargetTransformInfo & TTI

bool OnlyWhenForced

If false, use a cost model to determine whether unrolling of a loop is profitable.

const bool ForgetSCEV

If true, forget all loops when unrolling.

std::optional< unsigned > FullUnrollMaxCount

std::optional< bool > AllowPartial

std::optional< bool > AllowRuntime

std::optional< bool > AllowProfileBasedPeeling

std::optional< bool > AllowPeeling

std::optional< bool > AllowUpperBound

A CRTP mix-in to automatically provide informational APIs needed for passes.

bool PeelProfiledIterations

Allow peeling basing on profile.

unsigned PeelCount

A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...

Parameters that control the generic loop unrolling transformation.

unsigned Count

A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...

bool UpperBound

Allow using trip count upper bound to unroll loops.

unsigned Threshold

The cost threshold for the unrolled loop.

bool Force

Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).

unsigned PartialOptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...

unsigned DefaultUnrollRuntimeCount

Default unroll count for loops with run-time trip count.

unsigned MaxPercentThresholdBoost

If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...

unsigned SCEVExpansionBudget

Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.

unsigned UnrollAndJamInnerLoopThreshold

Threshold for unroll and jam, for inner loop size.

unsigned MaxIterationsCountToAnalyze

Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...

bool AllowRemainder

Allow generation of a loop remainder (extra iterations after unroll).

bool UnrollAndJam

Allow unroll and jam. Used to enable unroll and jam for the target.

bool UnrollRemainder

Allow unrolling of all the iterations of the runtime loop remainder.

unsigned FullUnrollMaxCount

Set the maximum unrolling factor for full unrolling.

unsigned PartialThreshold

The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...

unsigned OptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).

bool AllowExpensiveTripCount

Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...

unsigned MaxUpperBound

Set the maximum upper bound of trip count.

const Instruction * Heart

bool AllowExpensiveTripCount

unsigned SCEVExpansionBudget