LLVM: lib/Transforms/Vectorize/LoopVectorizationLegality.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

30

31using namespace llvm;

32using namespace PatternMatch;

33

34#define LV_NAME "loop-vectorize"

35#define DEBUG_TYPE LV_NAME

36

39 cl::desc("Enable if-conversion during vectorization."));

40

43 cl::desc("Enable recognition of non-constant strided "

44 "pointer induction variables."));

45

46namespace llvm {

49 cl::desc("Allow enabling loop hints to reorder "

50 "FP operations during vectorization."));

51}

52

53

54

57 cl::desc("The maximum number of SCEV checks allowed."));

58

61 cl::desc("The maximum number of SCEV checks allowed with a "

62 "vectorize(enable) pragma"));

63

68 cl::desc("Control whether the compiler can use scalable vectors to "

69 "vectorize a loop"),

72 "Scalable vectorization is disabled."),

75 "Scalable vectorization is available and favored when the "

76 "cost is inconclusive."),

79 "Scalable vectorization is available and favored when the "

80 "cost is inconclusive.")));

81

84 cl::desc("Enables autovectorization of some loops containing histograms"));

85

86

88

89namespace llvm {

90

91bool LoopVectorizeHints::Hint::validate(unsigned Val) {

92 switch (Kind) {

93 case HK_WIDTH:

95 case HK_INTERLEAVE:

97 case HK_FORCE:

98 return (Val <= 1);

99 case HK_ISVECTORIZED:

100 case HK_PREDICATE:

101 case HK_SCALABLE:

102 return (Val == 0 || Val == 1);

103 }

104 return false;

105}

106

108 bool InterleaveOnlyWhenForced,

112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),

113 Force("vectorize.enable", FK_Undefined, HK_FORCE),

114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),

115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),

116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),

117 TheLoop(L), ORE(ORE) {

118

119 getHintsFromMetadata();

120

121

124

125

126

127

128

129

130

135

136 if (Width.Value)

137

138

139

141 }

142

143

144

148

149

152

153 if (IsVectorized.Value != 1)

154

155

156

157 IsVectorized.Value =

160 << "LV: Interleaving disabled by the pass manager\n");

161}

162

165

167 Context,

168 {MDString::get(Context, "llvm.loop.isvectorized"),

173 {Twine(Prefix(), "vectorize.").str(),

174 Twine(Prefix(), "interleave.").str()},

175 {IsVectorizedMD});

177

178

179 IsVectorized.Value = 1;

180}

181

183 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {

185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");

187 return false;

188 }

189

191 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");

193 return false;

194 }

195

197 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");

198

199

200

201 ORE.emit([&]() {

203 "AllDisabled", L->getStartLoc(),

204 L->getHeader())

205 << "loop not vectorized: vectorization and interleaving are "

206 "explicitly disabled, or the loop has already been "

207 "vectorized";

208 });

209 return false;

210 }

211

212 return true;

213}

214

216 using namespace ore;

217

218 ORE.emit([&]() {

223 << "loop not vectorized: vectorization is explicitly disabled";

224

227 R << "loop not vectorized";

229 R << " (Force=" << NV("Force", true);

230 if (Width.Value != 0)

231 R << ", Vector Width=" << NV("VectorWidth", getWidth());

232 if (getInterleave() != 0)

233 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());

234 R << ")";

235 }

236 return R;

237 });

238}

239

248}

249

251

252

256 EC.getKnownMinValue() > 1);

257}

258

259void LoopVectorizeHints::getHintsFromMetadata() {

261 if (!LoopID)

262 return;

263

264

266 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");

267

271

272

273

274 if (const MDNode *MD = dyn_cast(MDO)) {

275 if (!MD || MD->getNumOperands() == 0)

276 continue;

277 S = dyn_cast(MD->getOperand(0));

278 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)

279 Args.push_back(MD->getOperand(Idx));

280 } else {

281 S = dyn_cast(MDO);

282 assert(Args.size() == 0 && "too many arguments for MDString");

283 }

284

285 if (!S)

286 continue;

287

288

290 if (Args.size() == 1)

291 setHint(Name, Args[0]);

292 }

293}

294

296 if (Name.starts_with(Prefix()))

297 return;

299

300 const ConstantInt *C = mdconst::dyn_extract(Arg);

301 if (C)

302 return;

303 unsigned Val = C->getZExtValue();

304

305 Hint *Hints[] = {&Width, &Interleave, &Force,

306 &IsVectorized, &Predicate, &Scalable};

307 for (auto *H : Hints) {

308 if (Name == H->Name) {

309 if (H->validate(Val))

310 H->Value = Val;

311 else

312 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");

313 break;

314 }

315 }

316}

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

343

344

345 if (Lp == OuterLp)

346 return true;

347 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");

348

349

351 if (IV) {

352 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");

353 return false;

354 }

355

356

358 auto *LatchBr = dyn_cast(Latch->getTerminator());

359 if (!LatchBr || LatchBr->isUnconditional()) {

360 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");

361 return false;

362 }

363

364

365 auto *LatchCmp = dyn_cast(LatchBr->getCondition());

366 if (!LatchCmp) {

368 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");

369 return false;

370 }

371

372 Value *CondOp0 = LatchCmp->getOperand(0);

373 Value *CondOp1 = LatchCmp->getOperand(1);

374 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);

375 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&

376 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {

377 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");

378 return false;

379 }

380

381 return true;

382}

383

384

385

388 return false;

389

390

391 for (Loop *SubLp : *Lp)

393 return false;

394

395 return true;

396}

397

400 return DL.getIntPtrType(Ty);

401

402

403

406

407 return Ty;

408}

409

414 return Ty0;

415 return Ty1;

416}

417

418

419

422

423

424 if (!AllowedExit.count(Inst))

425

428

429 if (!TheLoop->contains(UI)) {

430 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');

431 return true;

432 }

433 }

434 return false;

435}

436

437

440

441 if (A == B)

442 return true;

443

444

445 Value *APtr = A->getPointerOperand();

446 Value *BPtr = B->getPointerOperand();

447 if (APtr == BPtr)

448 return true;

449

450

452}

453

456

457

458

459

460 const auto &Strides =

462

465 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,

466 CanAddPredicate, false).value_or(0);

467 if (Stride == 1 || Stride == -1)

468 return Stride;

469 return 0;

470}

471

474}

475

476namespace {

477

478

479

480

481

482

483class SCEVAddRecForUniformityRewriter

485

486 unsigned StepMultiplier;

487

488

489 unsigned Offset;

490

491

492 Loop *TheLoop;

493

494

495 bool CannotAnalyze = false;

496

497 bool canAnalyze() const { return !CannotAnalyze; }

498

499public:

500 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,

501 unsigned Offset, Loop *TheLoop)

502 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),

503 TheLoop(TheLoop) {}

504

507 "addrec outside of TheLoop must be invariant and should have been "

508 "handled earlier");

509

510

513 if (!SE.isLoopInvariant(Step, TheLoop)) {

514 CannotAnalyze = true;

515 return Expr;

516 }

517 const SCEV *NewStep =

518 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));

519 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));

520 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);

521 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);

522 }

523

525 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))

526 return S;

528 }

529

531 if (SE.isLoopInvariant(S, TheLoop))

532 return S;

533

534 CannotAnalyze = true;

535 return S;

536 }

537

539

540 CannotAnalyze = true;

541 return S;

542 }

543

545 unsigned StepMultiplier, unsigned Offset,

546 Loop *TheLoop) {

547

548

549

550

552 [](const SCEV *S) { return isa(S); }))

554

555 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,

556 TheLoop);

558

562 }

563};

564

565}

566

569 return true;

571 return false;

573 return true;

574

575

576

577 auto *SE = PSE.getSE();

579 return false;

581

582

583

585 const SCEV *FirstLaneExpr =

586 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);

587 if (isa(FirstLaneExpr))

588 return false;

589

590

591

592

593 return all_of(reverse(seq(1, FixedVF)), [&](unsigned I) {

594 const SCEV *IthLaneExpr =

595 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);

596 return FirstLaneExpr == IthLaneExpr;

597 });

598}

599

603 if (Ptr)

604 return false;

605

606

607

608

610}

611

612bool LoopVectorizationLegality::canVectorizeOuterLoop() {

613 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");

614

615

616 bool Result = true;

618

620

621

622 auto *Br = dyn_cast(BB->getTerminator());

623 if (!Br) {

625 "loop control flow is not understood by vectorizer",

626 "CFGNotUnderstood", ORE, TheLoop);

627 if (DoExtraAnalysis)

628 Result = false;

629 else

630 return false;

631 }

632

633

634

635

636

637

638

639 if (Br && Br->isConditional() &&

644 "loop control flow is not understood by vectorizer",

645 "CFGNotUnderstood", ORE, TheLoop);

646 if (DoExtraAnalysis)

647 Result = false;

648 else

649 return false;

650 }

651 }

652

653

654

656 TheLoop )) {

658 "loop control flow is not understood by vectorizer",

659 "CFGNotUnderstood", ORE, TheLoop);

660 if (DoExtraAnalysis)

662 else

663 return false;

664 }

665

666

667 if (!setupOuterLoopInductions()) {

669 "UnsupportedPhi", ORE, TheLoop);

670 if (DoExtraAnalysis)

672 else

673 return false;

674 }

675

677}

678

679void LoopVectorizationLegality::addInductionPhi(

682 Inductions[Phi] = ID;

683

684

685

686

687

689 if (!Casts.empty())

690 InductionCastsToIgnore.insert(*Casts.begin());

691

692 Type *PhiTy = Phi->getType();

694

695

697 if (!WidestIndTy)

699 else

701 }

702

703

705 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&

706 isa(ID.getStartValue()) &&

707 cast(ID.getStartValue())->isNullValue()) {

708

709

710

711

712

713 if (!PrimaryInduction || PhiTy == WidestIndTy)

714 PrimaryInduction = Phi;

715 }

716

717

718

719

720

721

722

724 AllowedExit.insert(Phi);

726 }

727

728 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");

729}

730

731bool LoopVectorizationLegality::setupOuterLoopInductions() {

733

734

735 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {

739 addInductionPhi(&Phi, ID, AllowedExit);

740 return true;

741 }

742

743

745 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");

746 return false;

747 };

748

749 return llvm::all_of(Header->phis(), IsSupportedPhi);

750}

751

752

753

754

755

756

757

758

759

760

764

765

766 if (Scalarize) {

767 ElementCount WidestFixedVF, WidestScalableVF;

768 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);

775 assert((WidestScalableVF.isZero() || !Scalarize) &&

776 "Caller may decide to scalarize a variant using a scalable VF");

777 }

778 return Scalarize;

779}

780

781

782

784 auto *StructTy = dyn_cast(Ty);

785

786

787

788 if (StructTy && !StructTy->containsHomogeneousTypes())

789 return false;

791}

792

793bool LoopVectorizationLegality::canVectorizeInstrs() {

795

796

798

800 if (auto *Phi = dyn_cast(&I)) {

801 Type *PhiTy = Phi->getType();

802

806 "loop control flow is not understood by vectorizer",

807 "CFGNotUnderstood", ORE, TheLoop);

808 return false;

809 }

810

811

812

813

814 if (BB != Header) {

815

816

817

818

819

821 continue;

822 }

823

824

825 if (Phi->getNumIncomingValues() != 2) {

827 "loop control flow is not understood by vectorizer",

828 "CFGNotUnderstood", ORE, TheLoop, Phi);

829 return false;

830 }

831

834 DT, PSE.getSE())) {

837 Reductions[Phi] = RedDes;

838 continue;

839 }

840

841

842

843

844

845 auto IsDisallowedStridedPointerInduction =

848 return false;

850 ID.getConstIntStepValue() == nullptr;

851 };

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

869 !IsDisallowedStridedPointerInduction(ID)) {

870 addInductionPhi(Phi, ID, AllowedExit);

872 continue;

873 }

874

876 AllowedExit.insert(Phi);

877 FixedOrderRecurrences.insert(Phi);

878 continue;

879 }

880

881

882

884 !IsDisallowedStridedPointerInduction(ID)) {

885 addInductionPhi(Phi, ID, AllowedExit);

886 continue;

887 }

888

890 "value that could not be identified as "

891 "reduction is used outside the loop",

892 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);

893 return false;

894 }

895

896

897

898

899

900 auto *CI = dyn_cast(&I);

901

903 !isa(CI) &&

904 !(CI->getCalledFunction() && TLI &&

907

908

910 bool IsMathLibCall =

911 TLI && CI->getCalledFunction() &&

912 CI->getType()->isFloatingPointTy() &&

913 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&

915

916 if (IsMathLibCall) {

917

918

919

920

922 "Found a non-intrinsic callsite",

923 "library call cannot be vectorized. "

924 "Try compiling with -fno-math-errno, -ffast-math, "

925 "or similar flags",

926 "CantVectorizeLibcall", ORE, TheLoop, CI);

927 } else {

929 "call instruction cannot be vectorized",

930 "CantVectorizeLibcall", ORE, TheLoop, CI);

931 }

932 return false;

933 }

934

935

936

937 if (CI) {

938 auto *SE = PSE.getSE();

940 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)

943 TheLoop)) {

945 "intrinsic instruction cannot be vectorized",

946 "CantVectorizeIntrinsic", ORE, TheLoop, CI);

947 return false;

948 }

949 }

950 }

951

952

953

955 VecCallVariantsFound = true;

956

957 auto CanWidenInstructionTy = [this](Instruction const &Inst) {

958 Type *InstTy = Inst.getType();

959 if (!isa(InstTy))

961

962

963

964

966 all_of(Inst.users(), IsaPred)) {

967

968

969 StructVecCallFound = true;

970 return true;

971 }

972

973 return false;

974 };

975

976

977

978

979 if (!CanWidenInstructionTy(I) ||

980 (isa(I) &&

982 isa(I)) {

984 "instruction return type cannot be vectorized",

985 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);

986 return false;

987 }

988

989

990 if (auto *ST = dyn_cast(&I)) {

991 Type *T = ST->getValueOperand()->getType();

994 "CantVectorizeStore", ORE, TheLoop, ST);

995 return false;

996 }

997

998

999

1000 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {

1001

1003 assert(VecTy && "did not find vectorized version of stored type");

1006 "nontemporal store instruction cannot be vectorized",

1007 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);

1008 return false;

1009 }

1010 }

1011

1012 } else if (auto *LD = dyn_cast(&I)) {

1013 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {

1014

1015

1017 assert(VecTy && "did not find vectorized version of load type");

1020 "nontemporal load instruction cannot be vectorized",

1021 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);

1022 return false;

1023 }

1024 }

1025

1026

1027

1028

1029

1030

1031 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&

1032 I.isFast()) {

1033 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");

1034 Hints->setPotentiallyUnsafe();

1035 }

1036

1037

1038

1040

1041

1042

1043

1046 continue;

1047 }

1049 "ValueUsedOutsideLoop", ORE, TheLoop, &I);

1050 return false;

1051 }

1052 }

1053 }

1054

1055 if (!PrimaryInduction) {

1056 if (Inductions.empty()) {

1058 "loop induction variable could not be identified",

1059 "NoInductionVariable", ORE, TheLoop);

1060 return false;

1061 }

1062 if (!WidestIndTy) {

1064 "integer loop induction variable could not be identified",

1065 "NoIntegerInductionVariable", ORE, TheLoop);

1066 return false;

1067 }

1068 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");

1069 }

1070

1071

1072

1073

1074 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())

1075 PrimaryInduction = nullptr;

1076

1077 return true;

1078}

1079

1080

1081

1082

1083

1084

1085

1086

1087

1088

1089

1090

1091

1092

1096

1097

1101 return false;

1102

1103

1104

1105

1106

1107 Value *HIncVal = nullptr;

1110 return false;

1111

1112

1114 return false;

1115

1116

1118 if (GEP)

1119 return false;

1120

1121

1122 Value *HIdx = nullptr;

1123 for (Value *Index : GEP->indices()) {

1124 if (HIdx)

1125 return false;

1126 if (!isa(Index))

1127 HIdx = Index;

1128 }

1129

1130 if (!HIdx)

1131 return false;

1132

1133

1134

1135

1136

1137

1138

1139

1142 return false;

1143

1144

1145 const auto *AR = dyn_cast(PSE.getSE()->getSCEV(VPtrVal));

1146 if (!AR || AR->getLoop() != TheLoop)

1147 return false;

1148

1149

1150

1154 return false;

1155

1156 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");

1157

1158

1159 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);

1160 return true;

1161}

1162

1163bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {

1164

1165

1167 return false;

1168

1169

1173

1174

1175 if (!Deps)

1176 return false;

1177

1179

1180

1183 continue;

1184

1185

1186

1187

1189 return false;

1190

1191 IUDep = &Dep;

1192 }

1193 if (!IUDep)

1194 return false;

1195

1196

1197 LoadInst *LI = dyn_cast(IUDep->getSource(DepChecker));

1199

1200 if (!LI || !SI)

1201 return false;

1202

1203 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");

1205}

1206

1207bool LoopVectorizationLegality::canVectorizeMemory() {

1208 LAI = &LAIs.getInfo(*TheLoop);

1210 if (LAR) {

1211 ORE->emit([&]() {

1213 "loop not vectorized: ", *LAR);

1214 });

1215 }

1216

1218 return canVectorizeIndirectUnsafeDependences();

1219

1222 "write to a loop invariant address could not "

1223 "be vectorized",

1224 "CantVectorizeStoreToLoopInvariantAddress", ORE,

1225 TheLoop);

1226 return false;

1227 }

1228

1229

1230

1231

1232

1234

1235

1238 continue;

1239

1242 "We don't allow storing to uniform addresses",

1243 "write of conditional recurring variant value to a loop "

1244 "invariant address could not be vectorized",

1245 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);

1246 return false;

1247 }

1248

1249

1250

1251

1252 if (Instruction *Ptr = dyn_cast(SI->getPointerOperand())) {

1255 "Invariant address is calculated inside the loop",

1256 "write to a loop invariant address could not "

1257 "be vectorized",

1258 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);

1259 return false;

1260 }

1261 }

1262 }

1263

1265

1266

1267

1268

1269

1274

1275

1276

1277

1278

1279

1280

1281

1282

1283

1286 I->getValueOperand()->getType() ==

1287 SI->getValueOperand()->getType();

1288 });

1289 continue;

1290 }

1292 }

1293

1294 bool IsOK = UnhandledStores.empty();

1295

1296 if (!IsOK) {

1298 "We don't allow storing to uniform addresses",

1299 "write to a loop invariant address could not "

1300 "be vectorized",

1301 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);

1302 return false;

1303 }

1304 }

1305 }

1306

1308 return true;

1309}

1310

1312 bool EnableStrictReductions) {

1313

1314

1315 if (!Requirements->getExactFPInst() || Hints->allowReordering())

1316 return true;

1317

1318

1319

1320

1321 if (!EnableStrictReductions ||

1325 }))

1326 return false;

1327

1328

1329

1330

1334 }));

1335}

1336

1341 });

1342}

1343

1348 return false;

1349

1352 return V == InvariantAddress ||

1354 });

1355}

1356

1358 Value *In0 = const_cast<Value *>(V);

1359 PHINode *PN = dyn_cast_or_null(In0);

1360 if (!PN)

1361 return false;

1362

1363 return Inductions.count(PN);

1364}

1365

1369 return nullptr;

1373 return &ID;

1374 return nullptr;

1375}

1376

1380 return nullptr;

1383 return &ID;

1384 return nullptr;

1385}

1386

1388 const Value *V) const {

1389 auto *Inst = dyn_cast(V);

1390 return (Inst && InductionCastsToIgnore.count(Inst));

1391}

1392

1395}

1396

1398 const PHINode *Phi) const {

1399 return FixedOrderRecurrences.count(Phi);

1400}

1401

1403

1404

1405

1410 "Uncountable exiting block must be a direct predecessor of latch");

1411 return BB == Latch;

1412 }

1414}

1415

1416bool LoopVectorizationLegality::blockCanBePredicated(

1420

1421

1422 if (match(&I, m_IntrinsicIntrinsic::assume())) {

1424 continue;

1425 }

1426

1427

1428

1429

1430 if (isa(&I))

1431 continue;

1432

1433

1434

1435

1436

1437 if (CallInst *CI = dyn_cast(&I))

1439 MaskedOp.insert(CI);

1440 continue;

1441 }

1442

1443

1444 if (auto *LI = dyn_cast(&I)) {

1446 MaskedOp.insert(LI);

1447 continue;

1448 }

1449

1450

1451

1452

1453

1454

1455 if (auto *SI = dyn_cast(&I)) {

1456 MaskedOp.insert(SI);

1457 continue;

1458 }

1459

1460 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())

1461 return false;

1462 }

1463

1464 return true;

1465}

1466

1467bool LoopVectorizationLegality::canVectorizeWithIfConvert() {

1470 "IfConversionDisabled", ORE, TheLoop);

1471 return false;

1472 }

1473

1474 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");

1475

1476

1477

1478

1479

1480

1482

1483

1489 continue;

1490 }

1491

1492

1493

1494

1495

1496

1500 LoadInst *LI = dyn_cast(&I);

1501

1502

1503

1506 &Predicates))

1508 Predicates.clear();

1509 }

1510 }

1511

1512

1514

1515

1516 if (isa(BB->getTerminator())) {

1519 "LoopContainsUnsupportedSwitch", ORE,

1520 TheLoop, BB->getTerminator());

1521 return false;

1522 }

1523 } else if (!isa(BB->getTerminator())) {

1525 "LoopContainsUnsupportedTerminator", ORE,

1526 TheLoop, BB->getTerminator());

1527 return false;

1528 }

1529

1530

1532 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {

1534 "Control flow cannot be substituted for a select", "NoCFGForSelect",

1535 ORE, TheLoop, BB->getTerminator());

1536 return false;

1537 }

1538 }

1539

1540

1541 return true;

1542}

1543

1544

1545bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,

1546 bool UseVPlanNativePath) {

1548 "VPlan-native path is not enabled.");

1549

1550

1551

1552

1553

1554

1555

1556

1559

1560

1561

1564 "loop control flow is not understood by vectorizer",

1565 "CFGNotUnderstood", ORE, TheLoop);

1566 if (DoExtraAnalysis)

1568 else

1569 return false;

1570 }

1571

1572

1575 "loop control flow is not understood by vectorizer",

1576 "CFGNotUnderstood", ORE, TheLoop);

1577 if (DoExtraAnalysis)

1579 else

1580 return false;

1581 }

1582

1584}

1585

1586bool LoopVectorizationLegality::canVectorizeLoopNestCFG(

1587 Loop *Lp, bool UseVPlanNativePath) {

1588

1589

1592 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {

1593 if (DoExtraAnalysis)

1595 else

1596 return false;

1597 }

1598

1599

1600

1601 for (Loop *SubLp : *Lp)

1602 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {

1603 if (DoExtraAnalysis)

1605 else

1606 return false;

1607 }

1608

1610}

1611

1612bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {

1614 if (!LatchBB) {

1616 "Cannot vectorize early exit loop",

1617 "NoLatchEarlyExit", ORE, TheLoop);

1618 return false;

1619 }

1620

1621 if (Reductions.size() || FixedOrderRecurrences.size()) {

1623 "Found reductions or recurrences in early-exit loop",

1624 "Cannot vectorize early exit loop with reductions or recurrences",

1625 "RecurrencesInEarlyExitLoop", ORE, TheLoop);

1626 return false;

1627 }

1628

1631

1632

1634 for (BasicBlock *BB : ExitingBlocks) {

1637 if (isa(EC)) {

1638 UncountableExitingBlocks.push_back(BB);

1639

1641 if (Succs.size() != 2) {

1643 "Early exiting block does not have exactly two successors",

1644 "Incorrect number of successors from early exiting block",

1645 "EarlyExitTooManySuccessors", ORE, TheLoop);

1646 return false;

1647 }

1648

1650 if (!TheLoop->contains(Succs[0]))

1651 ExitBlock = Succs[0];

1652 else {

1654 ExitBlock = Succs[1];

1655 }

1656 UncountableExitBlocks.push_back(ExitBlock);

1657 } else

1658 CountableExitingBlocks.push_back(BB);

1659 }

1660

1661

1662

1663

1664 Predicates.clear();

1665

1666

1669 "Loop has too many uncountable exits",

1670 "Cannot vectorize early exit loop with more than one early exit",

1671 "TooManyUncountableEarlyExits", ORE, TheLoop);

1672 return false;

1673 }

1674

1675

1676

1680 "Cannot vectorize early exit loop",

1681 "EarlyExitNotLatchPredecessor", ORE, TheLoop);

1682 return false;

1683 }

1684

1685

1686 if (isa(

1689 "Cannot determine exact exit count for latch block",

1690 "Cannot vectorize early exit loop",

1691 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);

1692 return false;

1693 }

1695 "Latch block not found in list of countable exits!");

1696

1697

1698

1699 auto IsSafeOperation = [](Instruction *I) -> bool {

1700 switch (I->getOpcode()) {

1701 case Instruction::Load:

1702 case Instruction::Store:

1703 case Instruction::PHI:

1704 case Instruction::Br:

1705

1706 return true;

1707 default:

1709 }

1710 };

1711

1712 for (auto *BB : TheLoop->blocks())

1713 for (auto &I : *BB) {

1714 if (I.mayWriteToMemory()) {

1715

1717 "Writes to memory unsupported in early exit loops",

1718 "Cannot vectorize early exit loop with writes to memory",

1719 "WritesInEarlyExitLoop", ORE, TheLoop);

1720 return false;

1721 } else if (!IsSafeOperation(&I)) {

1723 "cannot be speculatively executed",

1724 "UnsafeOperationsEarlyExitLoop", ORE,

1725 TheLoop);

1726 return false;

1727 }

1728 }

1729

1730

1732 "Expected latch predecessor to be the early exiting block");

1733

1734

1735 Predicates.clear();

1737 &Predicates)) {

1739 "Loop may fault",

1740 "Cannot vectorize potentially faulting early exit loop",

1741 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);

1742 return false;

1743 }

1744

1745 [[maybe_unused]] const SCEV *SymbolicMaxBTC =

1747

1748

1749 assert(!isa(SymbolicMaxBTC) &&

1750 "Failed to get symbolic expression for backedge taken count");

1751 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "

1752 "backedge taken count: "

1753 << *SymbolicMaxBTC << '\n');

1754 return true;

1755}

1756

1758

1759

1760 bool Result = true;

1761

1763

1764

1765 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {

1766 if (DoExtraAnalysis) {

1767 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");

1768 Result = false;

1769 } else {

1770 return false;

1771 }

1772 }

1773

1774

1776 << '\n');

1777

1778

1779

1781 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");

1782

1783 if (!canVectorizeOuterLoop()) {

1785 "UnsupportedOuterLoop", ORE, TheLoop);

1786

1787

1788 return false;

1789 }

1790

1791 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");

1792 return Result;

1793 }

1794

1796

1797 unsigned NumBlocks = TheLoop->getNumBlocks();

1798 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {

1799 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");

1800 if (DoExtraAnalysis)

1801 Result = false;

1802 else

1803 return false;

1804 }

1805

1806

1807 if (!canVectorizeInstrs()) {

1808 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");

1809 if (DoExtraAnalysis)

1810 Result = false;

1811 else

1812 return false;

1813 }

1814

1815 HasUncountableEarlyExit = false;

1819 "UnsupportedUncountableLoop", ORE, TheLoop);

1820 if (DoExtraAnalysis)

1821 Result = false;

1822 else

1823 return false;

1824 } else {

1825 HasUncountableEarlyExit = true;

1826 if (!isVectorizableEarlyExitLoop()) {

1827 UncountableExitingBlocks.clear();

1828 HasUncountableEarlyExit = false;

1829 if (DoExtraAnalysis)

1830 Result = false;

1831 else

1832 return false;

1833 }

1834 }

1835 }

1836

1837

1838 if (!canVectorizeMemory()) {

1839 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");

1840 if (DoExtraAnalysis)

1841 Result = false;

1842 else

1843 return false;

1844 }

1845

1846 if (Result) {

1847 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"

1849 ? " (with a runtime bound check)"

1850 : "")

1851 << "!\n");

1852 }

1853

1857

1859 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "

1860 "due to SCEVThreshold");

1862 "Too many SCEV assumptions need to be made and checked at runtime",

1863 "TooManySCEVRunTimeChecks", ORE, TheLoop);

1864 if (DoExtraAnalysis)

1865 Result = false;

1866 else

1867 return false;

1868 }

1869

1870

1871

1872

1873

1874 return Result;

1875}

1876

1878

1879 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");

1880

1882

1884 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());

1885

1886

1887 for (auto *AE : AllowedExit) {

1888

1889

1890 if (ReductionLiveOuts.count(AE))

1891 continue;

1892 for (User *U : AE->users()) {

1895 continue;

1898 << "LV: Cannot fold tail by masking, loop has an outside user for "

1899 << *UI << "\n");

1900 return false;

1901 }

1902 }

1903

1905 PHINode *OrigPhi = Entry.first;

1906 for (User *U : OrigPhi->users()) {

1907 auto *UI = cast(U);

1908 if (!TheLoop->contains(UI)) {

1909 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "

1910 "outside user for "

1911 << *UI << "\n");

1912 return false;

1913 }

1914 }

1915 }

1916

1917

1919

1920

1921

1924 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {

1925 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");

1926 return false;

1927 }

1928 }

1929

1930 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");

1931

1932 return true;

1933}

1934

1936

1938

1939

1940

1942 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);

1943 assert(R && "Must be able to predicate block when tail-folding.");

1944 }

1945}

1946

1947}

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

loop Loop Strength Reduction

static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))

static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))

static const unsigned MaxInterleaveFactor

Maximum vectorization interleave count.

static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))

static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))

static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))

static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))

This file defines the LoopVectorizationLegality class.

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

This pass exposes codegen information to IR-level passes.

Virtual Register Rewriter

static const uint32_t IV[8]

Class for arbitrary precision integers.

LLVM Basic Block Representation.

const BasicBlock * getUniquePredecessor() const

Return the predecessor of this block if it has a unique predecessor block.

LLVMContext & getContext() const

Get the context in which this basic block lives.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

This class represents a function call, abstracting a target machine's calling convention.

This is the shared class of boolean and integer constants.

A parsed version of the target data layout string in and methods for querying it.

static constexpr ElementCount getScalable(ScalarTy MinVal)

static constexpr ElementCount getFixed(ScalarTy MinVal)

constexpr bool isScalar() const

Exactly one element.

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

A struct for saving information about induction variables.

@ IK_FpInduction

Floating point induction variable.

@ IK_PtrInduction

Pointer induction var. Step = C.

@ IK_IntInduction

Integer induction variable. Step = C.

static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)

Returns true if Phi is an induction in the loop L.

Instruction * getExactFPMathInst()

Returns floating-point induction operator that does not allow reassociation (transforming the inducti...

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

Value * getPointerOperand()

const LoopAccessInfo & getInfo(Loop &L)

const MemoryDepChecker & getDepChecker() const

the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...

ArrayRef< StoreInst * > getStoresToInvariantAddresses() const

Return the list of stores to invariant addresses.

const OptimizationRemarkAnalysis * getReport() const

The diagnostics report generated for the analysis.

const RuntimePointerChecking * getRuntimePointerChecking() const

bool canVectorizeMemory() const

Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.

bool isInvariant(Value *V) const

Returns true if value V is loop invariant.

bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const

Return true if the loop has memory dependence involving a load and a store to an invariant address,...

const PredicatedScalarEvolution & getPSE() const

Used to add runtime SCEV checks.

static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)

Return true if the block BB needs to be predicated in order for the loop to be vectorized.

const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const

If an access has a symbolic strides, this maps the pointer value to the stride symbol.

bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const

Return true if the loop has memory dependence involving two stores to an invariant address,...

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

BlockT * getLoopLatch() const

If there is a single latch block for this loop, return it.

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

unsigned getNumBlocks() const

Get the number of blocks in this loop in constant time.

unsigned getNumBackEdges() const

Calculate the number of back edges to the loop header.

void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const

Return all blocks inside the loop that have successors outside of the loop.

BlockT * getHeader() const

iterator_range< block_iterator > blocks() const

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

BlockT * getExitingBlock() const

If getExitingBlocks would return exactly one block, return that block.

bool isLoopExiting(const BlockT *BB) const

True if terminator in the block can branch to another block that is outside of the current loop.

bool isLoopHeader(const BlockT *BB) const

const SmallVector< BasicBlock *, 4 > & getUncountableExitingBlocks() const

Returns all the exiting blocks with an uncountable exit.

bool isInvariantStoreOfReduction(StoreInst *SI)

Returns True if given store is a final invariant store of one of the reductions found in the loop.

bool isInvariantAddressOfReduction(Value *V)

Returns True if given address is invariant and is used to store recurrent expression.

bool blockNeedsPredication(BasicBlock *BB) const

Return true if the block BB needs to be predicated in order for the loop to be vectorized.

bool canVectorize(bool UseVPlanNativePath)

Returns true if it is legal to vectorize this loop.

int isConsecutivePtr(Type *AccessTy, Value *Ptr) const

Check if this pointer is consecutive when vectorizing.

bool canVectorizeFPMath(bool EnableStrictReductions)

Returns true if it is legal to vectorize the FP math operations in this loop.

bool isFixedOrderRecurrence(const PHINode *Phi) const

Returns True if Phi is a fixed-order recurrence in this loop.

const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const

Returns a pointer to the induction descriptor, if Phi is pointer induction.

const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const

Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.

bool isInductionPhi(const Value *V) const

Returns True if V is a Phi node of an induction variable in this loop.

bool isUniform(Value *V, ElementCount VF) const

Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...

const InductionList & getInductionVars() const

Returns the induction variables found in the loop.

bool isInvariant(Value *V) const

Returns true if V is invariant across all loop iterations according to SCEV.

const ReductionList & getReductionVars() const

Returns the reduction variables found in the loop.

bool canFoldTailByMasking() const

Return true if we can vectorize this loop while folding its tail by masking.

void prepareToFoldTailByMasking()

Mark all respective loads/stores for masking.

bool hasUncountableEarlyExit() const

Returns true if the loop has an uncountable early exit, i.e.

bool isUniformMemOp(Instruction &I, ElementCount VF) const

A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...

BasicBlock * getUncountableEarlyExitingBlock() const

Returns the uncountable early exiting block.

bool isInductionVariable(const Value *V) const

Returns True if V can be considered as an induction variable in this loop.

bool isCastedInductionVariable(const Value *V) const

Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...

Instruction * getExactFPInst()

void addExactFPMathInst(Instruction *I)

Track the 1st floating-point instruction that can not be reassociated.

@ SK_PreferScalable

Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...

@ SK_Unspecified

Not selected.

@ SK_FixedWidthOnly

Disables vectorization with scalable vectors.

enum ForceKind getForce() const

bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const

bool allowReordering() const

When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...

void emitRemarkWithHints() const

Dumps all the hint information.

ElementCount getWidth() const

@ FK_Enabled

Forcing enabled.

@ FK_Undefined

Not selected.

@ FK_Disabled

Forcing disabled.

void setAlreadyVectorized()

Mark the loop L as already vectorized by setting the width to 1.

LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)

const char * vectorizeAnalysisPassName() const

If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...

unsigned getInterleave() const

unsigned getIsVectorized() const

Represents a single loop in the control flow graph.

DebugLoc getStartLoc() const

Return the debug location of the start of this loop.

bool isLoopInvariant(const Value *V) const

Return true if the specified value is loop invariant.

void setLoopID(MDNode *LoopID) const

Set the llvm.loop loop id metadata for this loop.

PHINode * getCanonicalInductionVariable() const

Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...

MDNode * getLoopID() const

Return the llvm.loop loop id metadata node for this loop if it is present.

const MDOperand & getOperand(unsigned I) const

ArrayRef< MDOperand > operands() const

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

unsigned getNumOperands() const

Return number of MDNode operands.

Tracking metadata reference owned by Metadata.

StringRef getString() const

static MDString * get(LLVMContext &Context, StringRef Str)

size_type count(const KeyT &Key) const

iterator find(const KeyT &Key)

Checks memory dependences among accesses to the same underlying object to determine whether there vec...

const SmallVectorImpl< Dependence > * getDependences() const

Returns the memory dependences.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

void addPredicate(const SCEVPredicate &Pred)

Adds a new predicate.

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

const SCEVPredicate & getPredicate() const

const SCEV * getBackedgeTakenCount()

Get the (predicated) backedge count for the analyzed loop.

const SCEV * getSymbolicMaxBackedgeTakenCount()

Get the (predicated) symbolic max backedge count for the analyzed loop.

const SCEV * getSCEV(Value *V)

Returns the SCEV expression of V, in the context of the current SCEV predicate.

The RecurrenceDescriptor is used to identify recurrences variables in a loop.

Instruction * getExactFPMathInst() const

Returns 1st non-reassociative FP instruction in the PHI node's use-chain.

static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)

Returns true if Phi is a fixed-order recurrence.

bool hasExactFPMath() const

Returns true if the recurrence has floating-point math that requires precise (ordered) operations.

Instruction * getLoopExitInstr() const

static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)

Returns true if Phi is a reduction in TheLoop.

bool isOrdered() const

Expose an ordered FP reduction to the instance users.

StoreInst * IntermediateStore

Reductions may store temporary or final result to an invariant address.

bool Need

This flag indicates if we need to add the runtime check.

This node represents a polynomial recurrence on the trip count of the specified loop.

const SCEV * getStart() const

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

const Loop * getLoop() const

virtual unsigned getComplexity() const

Returns the estimated complexity of this predicate.

virtual bool isAlwaysTrue() const =0

Returns true if the predicate is always true.

This visitor recursively visits a SCEV expression and re-writes it.

const SCEV * visit(const SCEV *S)

This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...

This class represents an analyzed expression in the program.

The main scalar evolution driver.

const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

bool isSCEVable(Type *Ty) const

Test if values of the given type are analyzable within the SCEV framework.

const SCEV * getPredicatedExitCount(const Loop *L, const BasicBlock *ExitingBlock, SmallVectorImpl< const SCEVPredicate * > *Predicates, ExitCountKind Kind=Exact)

Same as above except this uses the predicated backedge taken info and may require predicates.

const SCEV * getCouldNotCompute()

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Value * getPointerOperand()

StringRef - Represent a constant reference to a string, i.e.

static constexpr size_t npos

Provides information about what library functions are available for the current target.

bool hasOptimizedCodeGen(LibFunc F) const

Tests if the function is both available and a candidate for optimized code generation.

void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const

Returns the largest vectorization factor used in the list of vector functions.

bool getLibFunc(StringRef funcName, LibFunc &F) const

Searches for a particular function name.

bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

bool isLegalNTLoad(Type *DataType, Align Alignment) const

Return true if the target supports nontemporal load.

bool isLegalNTStore(Type *DataType, Align Alignment) const

Return true if the target supports nontemporal store.

bool enableScalableVectorization() const

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

std::string str() const

Return the twine contents as a std::string.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

bool isPointerTy() const

True if this is an instance of PointerType.

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

static IntegerType * getInt32Ty(LLVMContext &C)

bool isIntegerTy() const

True if this is an instance of IntegerType.

Value * getOperand(unsigned i) const

static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)

static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)

Retrieve all the VFInfo instances associated to the CallInst CI.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

iterator_range< user_iterator > users()

StringRef getName() const

Return a constant reference to the value's name.

static bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

constexpr bool isZero() const

const ParentTy * getParent() const

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)

Matches StoreInst.

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

bool match(Val *V, const Pattern &P)

bind_ty< Instruction > m_Instruction(Instruction *&I)

Match an instruction, capturing it if we match.

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)

OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)

Matches LoadInst.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

NodeAddr< PhiNode * > Phi

NodeAddr< FuncNode * > Func

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))

static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)

auto successors(const MachineBasicBlock *BB)

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)

static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)

bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

static bool isUniformLoop(Loop *Lp, Loop *OuterLp)

bool mustSuppressSpeculation(const LoadInst &LI)

Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...

static bool canWidenCallReturnType(Type *Ty)

Returns true if the call return type Ty can be widened by the loop vectorizer.

bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Return true if the loop L cannot fault on any iteration and only contains read-only memory accesses.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)

Return true if the instruction does not have any effects besides calculating the result and does not ...

static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)

Check that the instruction has outside loop users and is not an identified reduction variable.

static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)

Returns true if A and B have same pointer operands or same SCEVs addresses.

bool canVectorizeTy(Type *Ty)

Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...

bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic has a scalar operand.

void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)

Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...

llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)

Create a new LoopID after the loop has been transformed.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

auto predecessors(const MachineBasicBlock *BB)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)

Find histogram operations that match high-level code in loops:

static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)

Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...

bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...

bool SCEVExprContains(const SCEV *Root, PredTy Pred)

Return true if any node in Root satisfies the predicate Pred.

Dependece between memory access instructions.

Instruction * getDestination(const MemoryDepChecker &DepChecker) const

Return the destination instruction of the dependence.

Instruction * getSource(const MemoryDepChecker &DepChecker) const

Return the source instruction of the dependence.

static VectorizationSafetyStatus isSafeForVectorization(DepType Type)

Dependence types that don't prevent vectorization.

An object of this class is returned by queries that could not be answered.

TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.

Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.

static const unsigned MaxVectorWidth

Maximum SIMD width.

static bool isInterleaveForced()

True if force-vector-interleave was specified by the user.

static unsigned VectorizationInterleave

Interleave factor as overridden by the user.