LLVM: lib/Transforms/Scalar/LoopUnrollPass.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

62#include

63#include

64#include

65#include

66#include

67#include

68#include

69#include

70

71using namespace llvm;

72

73#define DEBUG_TYPE "loop-unroll"

74

77 cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"

78 " the current top-most loop. This is sometimes preferred to reduce"

79 " compile time."));

80

83 cl::desc("The cost threshold for loop unrolling"));

84

88 cl::desc("The cost threshold for loop unrolling when optimizing for "

89 "size"));

90

92 "unroll-partial-threshold", cl::Hidden,

93 cl::desc("The cost threshold for partial loop unrolling"));

94

97 cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "

98 "to the threshold when aggressively unrolling a loop due to the "

99 "dynamic cost savings. If completely unrolling a loop will reduce "

100 "the total runtime from X to Y, we boost the loop unroll "

101 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "

102 "X/Y). This limit avoids excessive code bloat."));

103

106 cl::desc("Don't allow loop unrolling to simulate more than this number of "

107 "iterations when checking full unroll profitability"));

108

111 cl::desc("Use this unroll count for all loops including those with "

112 "unroll_count pragma values, for testing purposes"));

113

116 cl::desc("Set the max unroll count for partial and runtime unrolling, for"

117 "testing purposes"));

118

122 "Set the max unroll count for full unrolling, for testing purposes"));

123

126 cl::desc("Allows loops to be partially unrolled until "

127 "-unroll-threshold loop size is reached."));

128

130 "unroll-allow-remainder", cl::Hidden,

131 cl::desc("Allow generation of a loop remainder (extra iterations) "

132 "when unrolling a loop."));

133

136 cl::desc("Unroll loops with run-time trip counts"));

137

141 "The max of trip count upper bound that is considered in unrolling"));

142

145 cl::desc("Unrolled size limit for loops with an unroll(full) or "

146 "unroll_count pragma."));

147

150 cl::desc("If the runtime tripcount for the loop is lower than the "

151 "threshold, the loop is considered as flat and will be less "

152 "aggressively unrolled."));

153

156 cl::desc("Allow the loop remainder to be unrolled."));

157

158

159

160

162 "unroll-revisit-child-loops", cl::Hidden,

163 cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "

164 "This shouldn't typically be needed as child loops (or their "

165 "clones) were already visited."));

166

169 cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "

170 "optimizations"));

174 cl::desc("Default threshold (max size of unrolled "

175 "loop), used in all but O3 optimizations"));

176

178 "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,

179 cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));

180

181

182

183

184static const unsigned NoThreshold = std::numeric_limits::max();

185

186

187

192 std::optional UserThreshold, std::optional UserCount,

193 std::optional UserAllowPartial, std::optional UserRuntime,

194 std::optional UserUpperBound,

195 std::optional UserFullUnrollMaxCount) {

197

198

207 UP.MaxCount = std::numeric_limits::max();

216 UP.Force = false;

224

225

226 TTI.getUnrollingPreferences(L, SE, UP, &ORE);

227

228

229 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||

230

234 if (OptForSize) {

238 }

239

240

265

266

267 if (UserThreshold) {

270 }

271 if (UserCount)

272 UP.Count = *UserCount;

273 if (UserAllowPartial)

274 UP.Partial = *UserAllowPartial;

275 if (UserRuntime)

276 UP.Runtime = *UserRuntime;

277 if (UserUpperBound)

279 if (UserFullUnrollMaxCount)

281

282 return UP;

283}

284

285namespace {

286

287

288

289

290

291

292

293struct UnrolledInstState {

295 int Iteration : 30;

296 unsigned IsFree : 1;

297 unsigned IsCounted : 1;

298};

299

300

301struct UnrolledInstStateKeyInfo {

302 using PtrInfo = DenseMapInfo<Instruction *>;

303 using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;

304

305 static inline UnrolledInstState getEmptyKey() {

306 return {PtrInfo::getEmptyKey(), 0, 0, 0};

307 }

308

309 static inline UnrolledInstState getTombstoneKey() {

310 return {PtrInfo::getTombstoneKey(), 0, 0, 0};

311 }

312

313 static inline unsigned getHashValue(const UnrolledInstState &S) {

314 return PairInfo::getHashValue({S.I, S.Iteration});

315 }

316

317 static inline bool isEqual(const UnrolledInstState &LHS,

318 const UnrolledInstState &RHS) {

319 return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});

320 }

321};

322

323struct EstimatedUnrollCost {

324

325 unsigned UnrolledCost;

326

327

328

329 unsigned RolledDynamicCost;

330};

331

332struct PragmaInfo {

333 PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)

334 : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),

335 PragmaEnableUnroll(PEU) {}

336 const bool UserUnrollCount;

337 const bool PragmaFullUnroll;

338 const unsigned PragmaCount;

339 const bool PragmaEnableUnroll;

340};

341

342}

343

344

345

346

347

348

349

350

351

352

353

354

355

356

361 unsigned MaxIterationsCountToAnalyze) {

362

363

364

365 assert(MaxIterationsCountToAnalyze <

366 (unsigned)(std::numeric_limits::max() / 2) &&

367 "The unroll iterations max is too large!");

368

369

370

371 if (!L->isInnermost())

372 return std::nullopt;

373

374

375 if (!TripCount || TripCount > MaxIterationsCountToAnalyze)

376 return std::nullopt;

377

382

383

384

386

387

388

389

390

391

393

394

395

396

397

399

400

401

403

404

406

407

408 auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {

409 assert(Iteration >= 0 && "Cannot have a negative iteration!");

410 assert(CostWorklist.empty() && "Must start with an empty cost list");

411 assert(PHIUsedList.empty() && "Must start with an empty phi used list");

417 for (;; --Iteration) {

418 do {

420

421

422

423 auto CostIter = InstCostMap.find({I, Iteration, 0, 0});

424 if (CostIter == InstCostMap.end())

425

426

427

428 continue;

429 auto &Cost = *CostIter;

430 if (Cost.IsCounted)

431

432 continue;

433

434

435 Cost.IsCounted = true;

436

437

439 if (PhiI->getParent() == L->getHeader()) {

440 assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "

441 "inherently simplify during unrolling.");

442 if (Iteration == 0)

443 continue;

444

445

446

447

449 PhiI->getIncomingValueForBlock(L->getLoopLatch())))

450 if (L->contains(OpI))

452 continue;

453 }

454

455

456 if (!Cost.IsFree) {

457

459 transform(I->operands(), std::back_inserter(Operands),

461 if (auto Res = SimplifiedValues.lookup(Op))

462 return Res;

463 return Op;

464 });

465 UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);

466 LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "

467 << Iteration << "): ");

469 }

470

471

472

473

474 for (Value *Op : I->operands()) {

475

476

478 if (!OpI || !L->contains(OpI))

479 continue;

480

481

483 }

484 } while (!CostWorklist.empty());

485

486 if (PHIUsedList.empty())

487

488 break;

489

490 assert(Iteration > 0 &&

491 "Cannot track PHI-used values past the first iteration!");

492 CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());

493 PHIUsedList.clear();

494 }

495 };

496

497

498

499 assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");

500 assert(L->isLCSSAForm(DT) &&

501 "Must have loops in LCSSA form to track live-out values.");

502

503 LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");

504

506 L->getHeader()->getParent()->hasMinSize() ?

508

509

510

511

512 for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {

513 LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");

514

515

516

519 if (PHI)

520 break;

521

522

523

525 PHI->getNumIncomingValues() == 2 &&

526 "Must have an incoming value only for the preheader and the latch.");

527

528 Value *V = PHI->getIncomingValueForBlock(

529 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());

530 if (Iteration != 0 && SimplifiedValues.count(V))

531 V = SimplifiedValues.lookup(V);

533 }

534

535

536 SimplifiedValues.clear();

537 while (!SimplifiedInputValues.empty())

539

541

542 BBWorklist.clear();

543 BBWorklist.insert(L->getHeader());

544

545 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {

547

548

549

550

552

553

554 if (EphValues.count(&I))

555 continue;

556

557

558

559 RolledDynamicCost += TTI.getInstructionCost(&I, CostKind);

560

561

562

563

564 bool IsFree = Analyzer.visit(I);

565 bool Inserted = InstCostMap.insert({&I, (int)Iteration,

566 (unsigned)IsFree,

567 false}).second;

568 (void)Inserted;

569 assert(Inserted && "Cannot have a state for an unvisited instruction!");

570

571 if (IsFree)

572 continue;

573

574

575

577 const Function *Callee = CI->getCalledFunction();

578 if (!Callee || TTI.isLoweredToCall(Callee)) {

579 LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");

580 return std::nullopt;

581 }

582 }

583

584

585

586 if (I.mayHaveSideEffects())

587 AddCostRecursively(I, Iteration);

588

589

590 if (UnrolledCost > MaxUnrolledLoopSize) {

591 LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"

592 << " UnrolledCost: " << UnrolledCost

593 << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize

594 << "\n");

595 return std::nullopt;

596 }

597 }

598

600

601 auto getSimplifiedConstant = [&](Value *V) -> Constant * {

602 if (SimplifiedValues.count(V))

603 V = SimplifiedValues.lookup(V);

605 };

606

607

608

611 if (BI->isConditional()) {

612 if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {

613

615 KnownSucc = BI->getSuccessor(0);

618 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);

619 }

620 }

622 if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {

623

625 KnownSucc = SI->getSuccessor(0);

628 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();

629 }

630 }

631 if (KnownSucc) {

632 if (L->contains(KnownSucc))

633 BBWorklist.insert(KnownSucc);

634 else

635 ExitWorklist.insert({BB, KnownSucc});

636 continue;

637 }

638

639

641 if (L->contains(Succ))

642 BBWorklist.insert(Succ);

643 else

644 ExitWorklist.insert({BB, Succ});

645 AddCostRecursively(*TI, Iteration);

646 }

647

648

649

650 if (UnrolledCost == RolledDynamicCost) {

651 LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"

652 << " UnrolledCost: " << UnrolledCost << "\n");

653 return std::nullopt;

654 }

655 }

656

657 while (!ExitWorklist.empty()) {

659 std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();

660

663 if (!PN)

664 break;

665

666 Value *Op = PN->getIncomingValueForBlock(ExitingBB);

668 if (L->contains(OpI))

669 AddCostRecursively(*OpI, TripCount - 1);

670 }

671 }

672

674 "All instructions must have a valid cost, whether the "

675 "loop is rolled or unrolled.");

676

678 << "UnrolledCost: " << UnrolledCost << ", "

679 << "RolledDynamicCost: " << RolledDynamicCost << "\n");

682}

683

689 Metrics.analyzeBasicBlock(BB, TTI, EphValues, false,

690 L);

692 NotDuplicatable = Metrics.notDuplicatable;

694 LoopSize = Metrics.NumInsts;

698

699

700

701

702

703

704

705 if (LoopSize.isValid() && LoopSize < BEInsns + 1)

706

707 LoopSize = BEInsns + 1;

708}

709

713 LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");

714 return false;

715 default:

716 break;

717 }

718 if (!LoopSize.isValid()) {

719 LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");

720 return false;

721 }

722 if (NotDuplicatable) {

723 LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");

724 return false;

725 }

726 return true;

727}

728

731 unsigned CountOverwrite) const {

732 unsigned LS = LoopSize.getValue();

733 assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");

734 if (CountOverwrite)

736 else

738}

739

740

741

742

744 if (MDNode *LoopID = L->getLoopID())

746 return nullptr;

747}

748

749

753

754

755

759

760

764

765

766

769 if (MD) {

771 "Unroll count hint metadata should have two operands.");

774 assert(Count >= 1 && "Unroll count must be positive.");

776 }

777 return 0;

778}

779

780

781

782

783

784

786 unsigned MaxPercentThresholdBoost) {

787 if (Cost.RolledDynamicCost >= std::numeric_limits::max() / 100)

788 return 100;

789 else if (Cost.UnrolledCost != 0)

790

791 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,

792 MaxPercentThresholdBoost);

793 else

794 return MaxPercentThresholdBoost;

795}

796

797static std::optional

799 const unsigned TripMultiple, const unsigned TripCount,

802

803

804

805

806 if (PInfo.UserUnrollCount) {

810 }

811

812

813 if (PInfo.PragmaCount > 0) {

814 if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))

815 return PInfo.PragmaCount;

816 }

817

818 if (PInfo.PragmaFullUnroll && TripCount != 0) {

819

820

821

823 LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");

824 return std::nullopt;

825 }

826

827 return TripCount;

828 }

829

830 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&

832 return MaxTripCount;

833

834

835 return std::nullopt;

836}

837

843 assert(FullUnrollTripCount && "should be non-zero!");

844

846 return std::nullopt;

847

848

849

851 return FullUnrollTripCount;

852

853

854

855

857 L, FullUnrollTripCount, DT, SE, EphValues, TTI,

860 unsigned Boost =

862 if (Cost->UnrolledCost < UP.Threshold * Boost / 100)

863 return FullUnrollTripCount;

864 }

865 return std::nullopt;

866}

867

868static std::optional

872

873 if (!TripCount)

874 return std::nullopt;

875

877 LLVM_DEBUG(dbgs() << " will not try to unroll partially because "

878 << "-unroll-allow-partial not given\n");

879 return 0;

880 }

883 count = TripCount;

885

891 while (count != 0 && TripCount % count != 0)

894

895

896

897

899 while (count != 0 &&

902 }

905 }

906 } else {

907 count = TripCount;

908 }

911

912 LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");

913

915}

916

917

918

919

920

921

922

923

932

934

935 const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;

939

940 const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||

941 PragmaEnableUnroll || UserUnrollCount;

942

943 PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,

944 PragmaEnableUnroll);

945

946

948 if (UnrollCount.getNumOccurrences() > 0) {

950 "explicit unroll count");

951 }

954 return true;

955 }

956

957

958

959 if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,

960 MaxTripCount, UCE, UP)) {

961 UP.Count = *UnrollFactor;

962

963 if (UserUnrollCount || (PragmaCount > 0)) {

966 }

967 UP.Runtime |= (PragmaCount > 0);

968 return ExplicitUnroll;

969 } else {

970 if (ExplicitUnroll && TripCount != 0) {

971

972

973

977 }

978 }

979

980

981

983 if (TripCount) {

984 UP.Count = TripCount;

986 TripCount, UCE, UP)) {

987 UP.Count = *UnrollFactor;

988 UseUpperBound = false;

989 return ExplicitUnroll;

990 }

991 }

992

993

994

995

996

997

998

999

1000

1001

1002

1003

1004

1005 if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&

1007 UP.Count = MaxTripCount;

1009 MaxTripCount, UCE, UP)) {

1010 UP.Count = *UnrollFactor;

1011 UseUpperBound = true;

1012 return ExplicitUnroll;

1013 }

1014 }

1015

1016

1021 return ExplicitUnroll;

1022 }

1023

1024

1025

1026 if (TripCount)

1027 UP.Partial |= ExplicitUnroll;

1028

1029

1030

1031 if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {

1032 UP.Count = *UnrollFactor;

1033

1034 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&

1035 UP.Count != TripCount)

1036 ORE->emit([&]() {

1038 "FullUnrollAsDirectedTooLarge",

1039 L->getStartLoc(), L->getHeader())

1040 << "Unable to fully unroll loop as directed by unroll pragma "

1041 "because "

1042 "unrolled size is too large.";

1043 });

1044

1046 if (UP.Count == 0) {

1047 if (PragmaEnableUnroll)

1048 ORE->emit([&]() {

1050 "UnrollAsDirectedTooLarge",

1051 L->getStartLoc(), L->getHeader())

1052 << "Unable to unroll loop as directed by unroll(enable) "

1053 "pragma "

1054 "because unrolled size is too large.";

1055 });

1056 }

1057 }

1058 return ExplicitUnroll;

1059 }

1060 assert(TripCount == 0 &&

1061 "All cases when TripCount is constant should be covered here.");

1062 if (PragmaFullUnroll)

1063 ORE->emit([&]() {

1065 DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",

1066 L->getStartLoc(), L->getHeader())

1067 << "Unable to fully unroll loop as directed by unroll(full) "

1068 "pragma "

1069 "because loop has a runtime trip count.";

1070 });

1071

1072

1073

1076 return false;

1077 }

1078

1079

1082 return false;

1083 }

1084

1085

1086 if (L->getHeader()->getParent()->hasProfileData()) {

1089 return false;

1090 else

1092 }

1093 }

1094 UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;

1097 dbgs() << " will not try to unroll loop with runtime trip count "

1098 << "-unroll-runtime not given\n");

1100 return false;

1101 }

1102 if (UP.Count == 0)

1104

1105

1106

1107 while (UP.Count != 0 &&

1110

1111#ifndef NDEBUG

1112 unsigned OrigCount = UP.Count;

1113#endif

1114

1116 while (UP.Count != 0 && TripMultiple % UP.Count != 0)

1119 dbgs() << "Remainder loop is restricted (that could architecture "

1120 "specific or because the loop contains a convergent "

1121 "instruction), so unroll count must divide the trip "

1122 "multiple, "

1123 << TripMultiple << ". Reducing unroll count from " << OrigCount

1124 << " to " << UP.Count << ".\n");

1125

1126 using namespace ore;

1127

1129 ORE->emit([&]() {

1131 "DifferentUnrollCountFromDirected",

1132 L->getStartLoc(), L->getHeader())

1133 << "Unable to unroll loop the number of times directed by "

1134 "unroll_count pragma because remainder loop is restricted "

1135 "(that could architecture specific or because the loop "

1136 "contains a convergent instruction) and so must have an "

1137 "unroll "

1138 "count that divides the loop trip multiple of "

1139 << NV("TripMultiple", TripMultiple) << ". Unrolling instead "

1140 << NV("UnrollCount", UP.Count) << " time(s).";

1141 });

1142 }

1143

1146

1147 if (MaxTripCount && UP.Count > MaxTripCount)

1148 UP.Count = MaxTripCount;

1149

1151 << "\n");

1152 if (UP.Count < 2)

1154 return ExplicitUnroll;

1155}

1156

1162 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,

1163 std::optional ProvidedCount,

1164 std::optional ProvidedThreshold,

1165 std::optional ProvidedAllowPartial,

1166 std::optional ProvidedRuntime,

1167 std::optional ProvidedUpperBound,

1168 std::optional ProvidedAllowPeeling,

1169 std::optional ProvidedAllowProfileBasedPeeling,

1170 std::optional ProvidedFullUnrollMaxCount,

1172

1174 << L->getHeader()->getParent()->getName() << "] Loop %"

1175 << L->getHeader()->getName() << "\n");

1179

1180

1181

1182

1183

1184 Loop *ParentL = L->getParentLoop();

1185 if (ParentL != nullptr &&

1188 LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"

1189 << " llvm.loop.unroll_and_jam.\n");

1191 }

1192

1193

1194

1195

1200 << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");

1202 }

1203

1204 if (!L->isLoopSimplifyForm()) {

1206 dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");

1208 }

1209

1210

1211

1212 if (OnlyWhenForced && !(TM & TM_Enable))

1214

1215 bool OptForSize = L->getHeader()->getParent()->hasOptSize();

1217 L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,

1218 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,

1219 ProvidedFullUnrollMaxCount);

1221 L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);

1222

1223

1224

1226 !OptForSize)

1228

1231

1234 LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");

1236 }

1237

1239 LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");

1240

1241

1242

1243 if (OptForSize)

1245

1247 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");

1249 }

1250

1251

1252

1253

1254

1255

1256 unsigned TripCount = 0;

1257 unsigned TripMultiple = 1;

1259 L->getExitingBlocks(ExitingBlocks);

1260 for (BasicBlock *ExitingBlock : ExitingBlocks)

1262 if (!TripCount || TC < TripCount)

1263 TripCount = TripMultiple = TC;

1264

1265 if (!TripCount) {

1266

1267

1268

1269 BasicBlock *ExitingBlock = L->getLoopLatch();

1270 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))

1271 ExitingBlock = L->getExitingBlock();

1272 if (ExitingBlock)

1274 }

1275

1276

1277

1278

1279

1280

1281

1282

1284

1285

1286

1287 unsigned MaxTripCount = 0;

1288 bool MaxOrZero = false;

1289 if (!TripCount) {

1292 }

1293

1294

1295

1296 bool UseUpperBound = false;

1298 L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,

1299 MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);

1302

1304

1306 assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");

1307 LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()

1308 << " with iteration count " << PP.PeelCount << "!\n");

1309 ORE.emit([&]() {

1311 L->getHeader())

1313 << " iterations";

1314 });

1315

1318 VMap)) {

1320

1321

1323 L->setLoopAlreadyUnrolled();

1325 }

1327 }

1328

1329

1330 if (OnlyFullUnroll && ((!TripCount && !MaxTripCount) ||

1331 UP.Count < TripCount || UP.Count < MaxTripCount)) {

1333 dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");

1335 }

1336

1337

1338

1339

1340

1341

1342 UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;

1343

1344

1345 MDNode *OrigLoopID = L->getLoopID();

1346

1347

1348 Loop *RemainderLoop = nullptr;

1361 L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);

1364

1365 if (RemainderLoop) {

1366 std::optional<MDNode *> RemainderLoopID =

1369 if (RemainderLoopID)

1370 RemainderLoop->setLoopID(*RemainderLoopID);

1371 }

1372

1374 std::optional<MDNode *> NewLoopID =

1377 if (NewLoopID) {

1378 L->setLoopID(*NewLoopID);

1379

1380

1381

1382 return UnrollResult;

1383 }

1384 }

1385

1386

1387

1389 L->setLoopAlreadyUnrolled();

1390

1391 return UnrollResult;

1392}

1393

1394namespace {

1395

1396class LoopUnroll : public LoopPass {

1397public:

1398 static char ID;

1399

1400 int OptLevel;

1401

1402

1403

1404

1405 bool OnlyWhenForced;

1406

1407

1408

1409

1410 bool ForgetAllSCEV;

1411

1412 std::optional ProvidedCount;

1413 std::optional ProvidedThreshold;

1414 std::optional ProvidedAllowPartial;

1415 std::optional ProvidedRuntime;

1416 std::optional ProvidedUpperBound;

1417 std::optional ProvidedAllowPeeling;

1418 std::optional ProvidedAllowProfileBasedPeeling;

1419 std::optional ProvidedFullUnrollMaxCount;

1420

1421 LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,

1422 bool ForgetAllSCEV = false,

1423 std::optional Threshold = std::nullopt,

1424 std::optional Count = std::nullopt,

1425 std::optional AllowPartial = std::nullopt,

1426 std::optional Runtime = std::nullopt,

1427 std::optional UpperBound = std::nullopt,

1428 std::optional AllowPeeling = std::nullopt,

1429 std::optional AllowProfileBasedPeeling = std::nullopt,

1430 std::optional ProvidedFullUnrollMaxCount = std::nullopt)

1431 : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),

1432 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),

1433 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),

1434 ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),

1435 ProvidedAllowPeeling(AllowPeeling),

1436 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),

1437 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {

1439 }

1440

1441 bool runOnLoop(Loop *L, LPPassManager &LPM) override {

1442 if (skipLoop(L))

1443 return false;

1444

1445 Function &F = *L->getHeader()->getParent();

1446

1447 auto &DT = getAnalysis().getDomTree();

1448 LoopInfo *LI = &getAnalysis().getLoopInfo();

1449 ScalarEvolution &SE = getAnalysis().getSE();

1450 const TargetTransformInfo &TTI =

1451 getAnalysis().getTTI(F);

1452 auto &AC = getAnalysis().getAssumptionCache(F);

1453

1454

1455

1456 OptimizationRemarkEmitter ORE(&F);

1457 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);

1458

1460 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,

1461 false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,

1462 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,

1463 ProvidedUpperBound, ProvidedAllowPeeling,

1464 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);

1465

1466 if (Result == LoopUnrollResult::FullyUnrolled)

1468

1469 return Result != LoopUnrollResult::Unmodified;

1470 }

1471

1472

1473

1474 void getAnalysisUsage(AnalysisUsage &AU) const override {

1475 AU.addRequired();

1476 AU.addRequired();

1477

1478

1480 }

1481};

1482

1483}

1484

1485char LoopUnroll::ID = 0;

1486

1491INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)

1492

1494 bool ForgetAllSCEV, int Threshold, int Count,

1495 int AllowPartial, int Runtime, int UpperBound,

1496 int AllowPeeling) {

1497

1498

1499

1500 return new LoopUnroll(

1501 OptLevel, OnlyWhenForced, ForgetAllSCEV,

1502 Threshold == -1 ? std::nullopt : std::optional(Threshold),

1503 Count == -1 ? std::nullopt : std::optional(Count),

1504 AllowPartial == -1 ? std::nullopt : std::optional(AllowPartial),

1505 Runtime == -1 ? std::nullopt : std::optional(Runtime),

1506 UpperBound == -1 ? std::nullopt : std::optional(UpperBound),

1507 AllowPeeling == -1 ? std::nullopt : std::optional(AllowPeeling));

1508}

1509

1513

1514

1515

1517

1518

1519

1520 Loop *ParentL = L.getParentLoop();

1522 if (ParentL)

1524 else

1526

1527 std::string LoopName = std::string(L.getName());

1528

1531 nullptr, nullptr,

1532 true, OptLevel, true,

1533 OnlyWhenForced, ForgetSCEV, std::nullopt,

1534 std::nullopt, false,

1535 false, false,

1536 true,

1537 false,

1538 std::nullopt) !=

1542

1543

1544#ifndef NDEBUG

1545 if (ParentL)

1547#endif

1548

1549

1550

1551

1552

1553

1554

1555

1556

1557

1558

1559

1560

1561

1562

1563

1564

1565 bool IsCurrentLoopValid = false;

1567 if (ParentL)

1569 else

1572 if (SibLoop == &L) {

1573 IsCurrentLoopValid = true;

1574 return true;

1575 }

1576

1577

1578 return OldLoops.contains(SibLoop);

1579 });

1581

1582 if (!IsCurrentLoopValid) {

1584 } else {

1585

1587

1590 }

1591 }

1592

1594}

1595

1599

1600

1609

1611 if (auto *LAMProxy = AM.getCachedResult(F))

1612 LAM = &LAMProxy->getManager();

1613

1617 auto *BFI = (PSI && PSI->hasProfileSummary()) ?

1619

1621

1622

1623

1624

1625

1626

1627 for (const auto &L : LI) {

1629 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false );

1631 }

1632

1633

1634

1637

1638 while (!Worklist.empty()) {

1639

1640

1641

1642

1644#ifndef NDEBUG

1645 Loop *ParentL = L.getParentLoop();

1646#endif

1647

1648

1649

1650

1651 std::optional LocalAllowPeeling = UnrollOpts.AllowPeeling;

1652 if (PSI && PSI->hasHugeWorkingSetSize())

1653 LocalAllowPeeling = false;

1654 std::string LoopName = std::string(L.getName());

1655

1656

1658 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,

1659 true, UnrollOpts.OptLevel, false,

1660 UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,

1661 std::nullopt,

1662 std::nullopt, UnrollOpts.AllowPartial,

1663 UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,

1664 UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,

1665 &AA);

1667

1668

1669#ifndef NDEBUG

1672#endif

1673

1674

1676 LAM->clear(L, LoopName);

1677 }

1678

1681

1683}

1684

1688 OS, MapClassName2PassName);

1689 OS << '<';

1690 if (UnrollOpts.AllowPartial != std::nullopt)

1691 OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";

1692 if (UnrollOpts.AllowPeeling != std::nullopt)

1693 OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";

1694 if (UnrollOpts.AllowRuntime != std::nullopt)

1695 OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";

1696 if (UnrollOpts.AllowUpperBound != std::nullopt)

1697 OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";

1698 if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)

1699 OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")

1700 << "profile-peeling;";

1701 if (UnrollOpts.FullUnrollMaxCount != std::nullopt)

1702 OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';

1703 OS << 'O' << UnrollOpts.OptLevel;

1704 OS << '>';

1705}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

This file defines DenseMapInfo traits for DenseMap.

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

This file provides various utilities for inspecting and working with the control flow graph in LLVM I...

This header defines various interfaces for pass management in LLVM.

This header provides classes for managing per-loop analyses.

This header provides classes for managing a pipeline of passes over loops in LLVM IR.

static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)

static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))

static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))

static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))

static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))

static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))

static bool hasUnrollFullPragma(const Loop *L)

Definition LoopUnrollPass.cpp:750

static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))

static unsigned unrollCountPragmaValue(const Loop *L)

Definition LoopUnrollPass.cpp:767

static bool hasUnrollEnablePragma(const Loop *L)

Definition LoopUnrollPass.cpp:756

static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))

static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))

static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)

Definition LoopUnrollPass.cpp:838

static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)

Figure out if the loop is worth full unrolling.

Definition LoopUnrollPass.cpp:357

static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))

static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))

static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)

Definition LoopUnrollPass.cpp:869

static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))

static const unsigned NoThreshold

A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...

Definition LoopUnrollPass.cpp:184

static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)

Definition LoopUnrollPass.cpp:798

static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))

static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))

static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))

static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)

Definition LoopUnrollPass.cpp:1158

static bool hasRuntimeUnrollDisablePragma(const Loop *L)

Definition LoopUnrollPass.cpp:761

static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)

Definition LoopUnrollPass.cpp:785

static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))

static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability"))

static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))

static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))

static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This pass exposes codegen information to IR-level passes.

A manager for alias analyses.

PassT::Result * getCachedResult(IRUnitT &IR) const

Get the cached result of an analysis pass for a given IR unit.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Analysis pass which computes BlockFrequencyInfo.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Conditional or Unconditional Branch instruction.

This is the shared class of boolean and integer constants.

This is an important base class in LLVM.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Implements a dense probed hash-table based set.

Analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

bool hasMinSize() const

Optimize this function for minimum size (-Oz).

CostType getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

void addChildLoops(ArrayRef< Loop * > NewChildLoops)

Loop passes should use this method to indicate they have added new child loops of the current loop.

void markLoopAsDeleted(Loop &L, llvm::StringRef Name)

Loop passes should use this method to indicate they have deleted a loop from the nest.

void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)

Loop passes should use this method to indicate they have added new sibling loops to the current loop.

void markLoopAsDeleted(Loop &L)

Analysis pass that exposes the LoopInfo for a function.

void verifyLoop() const

Verify loop structure.

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

Definition LoopUnrollPass.cpp:1510

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition LoopUnrollPass.cpp:1596

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)

Definition LoopUnrollPass.cpp:1685

Represents a single loop in the control flow graph.

void setLoopID(MDNode *LoopID) const

Set the llvm.loop loop id metadata for this loop.

const MDOperand & getOperand(unsigned I) const

unsigned getNumOperands() const

Return number of MDNode operands.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

bool empty() const

Determine if the PriorityWorklist is empty or not.

An analysis pass based on the new PM to deliver ProfileSummaryInfo.

Analysis providing profile information.

Analysis pass that exposes the ScalarEvolution for a function.

The main scalar evolution driver.

LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)

Returns the largest constant divisor of the trip count as a normal unsigned value,...

LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Returns the upper bound of the loop trip count as a normal unsigned value.

LLVM_ABI bool isBackedgeTakenCountMaxOrZero(const Loop *L)

Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...

LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)

Returns the exact trip count of the loop if we can compute it, and the result is a small constant.

size_type size() const

Determine the number of elements in the SetVector.

void clear()

Completely clear the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

bool insert(const value_type &X)

Insert a new element into the SetVector.

value_type pop_back_val()

A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

void insert_range(Range &&R)

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

Analysis pass providing the TargetTransformInfo.

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

TargetCostKind

The kind of cost model.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

Produce an estimate of the unrolled cost of the specified loop.

ConvergenceKind Convergence

bool ConvergenceAllowsRuntime

LLVM_ABI uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const

Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.

Definition LoopUnrollPass.cpp:729

LLVM_ABI bool canUnroll() const

Whether it is legal to unroll this loop.

Definition LoopUnrollPass.cpp:710

unsigned NumInlineCandidates

LLVM_ABI UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)

Definition LoopUnrollPass.cpp:684

uint64_t getRolledLoopSize() const

void visit(Iterator Start, Iterator End)

LLVM Value Representation.

std::pair< iterator, bool > insert(const ValueT &V)

iterator find(const_arg_type_t< ValueT > V)

An efficient, type-erasing, non-owning reference to a callable.

This class implements an extremely fast bulk output stream that can only output to a stream.

Abstract Attribute helper functions.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

Add a small namespace to avoid name clashes with the classes used in the streaming interface.

DiagnosticInfoOptimizationBase::Argument NV

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)

Simplify each loop in a loop nest recursively.

LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)

Return either:

bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)

LLVM_ABI void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, AAResults *AA=nullptr)

Perform some cleanup and simplifications on loops after unrolling.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

auto successors(const MachineBasicBlock *BB)

@ Runtime

Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...

OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy

Provide the ModuleAnalysisManager to Function proxy.

LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)

Put a loop nest into LCSSA form.

LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)

Create a new loop identifier for a loop created from a loop transformation.

LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

LLVM_ABI Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)

Definition LoopUnrollPass.cpp:1493

AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager

The loop analysis manager.

OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)

Wrapper function around std::transform to apply a function to a range and store the result elsewhere.

LLVM_ABI void initializeLoopUnrollPass(PassRegistry &)

TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)

LLVM_ABI CallBase * getLoopConvergenceHeart(const Loop *TheLoop)

Find the convergence heart of the loop.

LLVM_ABI TransformationMode hasUnrollAndJamTransformation(const Loop *L)

cl::opt< bool > ForgetSCEVInLoopUnroll

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)

LLVM_TEMPLATE_ABI void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)

Utility that implements appending of loops onto a worklist given a range.

LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget

FunctionAddr VTableAddr Count

LLVM_ABI TransformationMode hasUnrollTransformation(const Loop *L)

LoopUnrollResult

Represents the result of a UnrollLoop invocation.

@ PartiallyUnrolled

The loop was partially unrolled – we still have a loop, but with a smaller trip count.

@ Unmodified

The loop was not modified.

@ FullyUnrolled

The loop was fully unrolled into straight-line code.

LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)

Definition LoopUnrollPass.cpp:924

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI void getLoopAnalysisUsage(AnalysisUsage &AU)

Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.

const char *const LLVMLoopUnrollFollowupAll

TransformationMode

The mode sets how eager a transformation should be applied.

@ TM_ForcedByUser

The transformation was directed by the user, e.g.

@ TM_Disable

The transformation should not be applied.

@ TM_Enable

The transformation should be applied without considering a cost model.

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

DWARFExpression::Operation Op

LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)

Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...

Definition LoopUnrollPass.cpp:188

ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

const char *const LLVMLoopUnrollFollowupRemainder

LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

const char *const LLVMLoopUnrollFollowupUnrolled

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

bool peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)

VMap is the value-map that maps instructions from the original loop to instructions in the last peele...

LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)

Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...

LLVM_ABI LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)

Unroll the given loop by Count.

LLVM_ABI void reportFatalUsageError(Error Err)

Report a fatal error that does not indicate a bug in LLVM.

Utility to calculate the size and a few similar metrics for a set of basic blocks.

static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)

Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...

TargetTransformInfo & TTI

A CRTP mix-in to automatically provide informational APIs needed for passes.

bool PeelLast

Peel off the last PeelCount loop iterations.

bool PeelProfiledIterations

Allow peeling basing on profile.

unsigned PeelCount

A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...

Parameters that control the generic loop unrolling transformation.

unsigned Count

A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...

bool UpperBound

Allow using trip count upper bound to unroll loops.

unsigned Threshold

The cost threshold for the unrolled loop.

bool Force

Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).

unsigned PartialOptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...

unsigned DefaultUnrollRuntimeCount

Default unroll count for loops with run-time trip count.

unsigned MaxPercentThresholdBoost

If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...

bool RuntimeUnrollMultiExit

Allow runtime unrolling multi-exit loops.

unsigned SCEVExpansionBudget

Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.

bool AddAdditionalAccumulators

Allow unrolling to add parallel reduction phis.

unsigned UnrollAndJamInnerLoopThreshold

Threshold for unroll and jam, for inner loop size.

unsigned MaxIterationsCountToAnalyze

Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...

bool AllowRemainder

Allow generation of a loop remainder (extra iterations after unroll).

bool UnrollAndJam

Allow unroll and jam. Used to enable unroll and jam for the target.

bool UnrollRemainder

Allow unrolling of all the iterations of the runtime loop remainder.

unsigned FullUnrollMaxCount

Set the maximum unrolling factor for full unrolling.

unsigned PartialThreshold

The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...

unsigned OptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).

bool AllowExpensiveTripCount

Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...

unsigned MaxUpperBound

Set the maximum upper bound of trip count.

const Instruction * Heart

bool RuntimeUnrollMultiExit

bool AllowExpensiveTripCount

bool AddAdditionalAccumulators

unsigned SCEVExpansionBudget