LLVM: lib/Transforms/Vectorize/LoopVectorizationLegality.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

32

33using namespace llvm;

35

36#define LV_NAME "loop-vectorize"

37#define DEBUG_TYPE LV_NAME

38

41 cl::desc("Enable if-conversion during vectorization."));

42

45 cl::desc("Enable recognition of non-constant strided "

46 "pointer induction variables."));

47

50 cl::desc("Allow enabling loop hints to reorder "

51 "FP operations during vectorization."));

52

53

54

57 cl::desc("The maximum number of SCEV checks allowed."));

58

61 cl::desc("The maximum number of SCEV checks allowed with a "

62 "vectorize(enable) pragma"));

63

68 cl::desc("Control whether the compiler can use scalable vectors to "

69 "vectorize a loop"),

72 "Scalable vectorization is disabled."),

75 "Scalable vectorization is available and favored when the "

76 "cost is inconclusive."),

79 "Scalable vectorization is available and favored when the "

80 "cost is inconclusive.")));

81

84 cl::desc("Enables autovectorization of some loops containing histograms"));

85

86

88

89namespace llvm {

90

91bool LoopVectorizeHints::Hint::validate(unsigned Val) {

92 switch (Kind) {

93 case HK_WIDTH:

95 case HK_INTERLEAVE:

97 case HK_FORCE:

98 return (Val <= 1);

99 case HK_ISVECTORIZED:

100 case HK_PREDICATE:

101 case HK_SCALABLE:

102 return (Val == 0 || Val == 1);

103 }

104 return false;

105}

106

108 bool InterleaveOnlyWhenForced,

112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),

113 Force("vectorize.enable", FK_Undefined, HK_FORCE),

114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),

115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),

116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),

117 TheLoop(L), ORE(ORE) {

118

119 getHintsFromMetadata();

120

121

124

125

126

127

128

129

130

135

136 if (Width.Value)

137

138

139

141 }

142

143

144

148

149

152

153 if (IsVectorized.Value != 1)

154

155

156

157 IsVectorized.Value =

160 << "LV: Interleaving disabled by the pass manager\n");

161}

162

164 LLVMContext &Context = TheLoop->getHeader()->getContext();

165

167 Context,

168 {MDString::get(Context, "llvm.loop.isvectorized"),

170 MDNode *LoopID = TheLoop->getLoopID();

173 {Twine(Prefix(), "vectorize.").str(),

174 Twine(Prefix(), "interleave.").str()},

175 {IsVectorizedMD});

176 TheLoop->setLoopID(NewLoopID);

177

178

179 IsVectorized.Value = 1;

180}

181

182void LoopVectorizeHints::reportDisallowedVectorization(

184 const StringRef RemarkMsg, const Loop *L) const {

185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");

187 L->getHeader())

188 << "loop not vectorized: " << RemarkMsg);

189}

190

192 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {

195 reportDisallowedVectorization("#pragma vectorize disable",

196 "MissedExplicitlyDisabled",

197 "vectorization is explicitly disabled", L);

199 reportDisallowedVectorization("loop hasDisableAllTransformsHint",

200 "MissedTransformsDisabled",

201 "loop transformations are disabled", L);

202 } else {

203 llvm_unreachable("loop vect disabled for an unknown reason");

204 }

205 return false;

206 }

207

209 reportDisallowedVectorization(

210 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",

211 "MissedForceOnly", "only vectorizing loops that explicitly request it",

212 L);

213 return false;

214 }

215

217 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");

218

219

220

221 ORE.emit([&]() {

223 "AllDisabled", L->getStartLoc(),

224 L->getHeader())

225 << "loop not vectorized: vectorization and interleaving are "

226 "explicitly disabled, or the loop has already been "

227 "vectorized";

228 });

229 return false;

230 }

231

232 return true;

233}

234

236 using namespace ore;

237

238 ORE.emit([&]() {

241 TheLoop->getStartLoc(),

242 TheLoop->getHeader())

243 << "loop not vectorized: vectorization is explicitly disabled";

244

246 TheLoop->getHeader());

247 R << "loop not vectorized";

249 R << " (Force=" << NV("Force", true);

250 if (Width.Value != 0)

251 R << ", Vector Width=" << NV("VectorWidth", getWidth());

253 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());

254 R << ")";

255 }

256 return R;

257 });

258}

259

269

271

272

276 EC.getKnownMinValue() > 1);

277}

278

279void LoopVectorizeHints::getHintsFromMetadata() {

281 if (!LoopID)

282 return;

283

284

286 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");

287

291

292

293

295 if (!MD || MD->getNumOperands() == 0)

296 continue;

298 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)

299 Args.push_back(MD->getOperand(Idx));

300 } else {

302 assert(Args.size() == 0 && "too many arguments for MDString");

303 }

304

305 if (!S)

306 continue;

307

308

310 if (Args.size() == 1)

311 setHint(Name, Args[0]);

312 }

313}

314

316 if (Name.consume_front(Prefix()))

317 return;

318

320 if (C)

321 return;

322 unsigned Val = C->getZExtValue();

323

324 Hint *Hints[] = {&Width, &Interleave, &Force,

325 &IsVectorized, &Predicate, &Scalable};

326 for (auto *H : Hints) {

327 if (Name == H->Name) {

328 if (H->validate(Val))

329 H->Value = Val;

330 else

331 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");

332 break;

333 }

334 }

335}

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

362

363

364 if (Lp == OuterLp)

365 return true;

366 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");

367

368

370 if (IV) {

371 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");

372 return false;

373 }

374

375

378 if (!LatchBr || LatchBr->isUnconditional()) {

379 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");

380 return false;

381 }

382

383

385 if (!LatchCmp) {

387 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");

388 return false;

389 }

390

391 Value *CondOp0 = LatchCmp->getOperand(0);

392 Value *CondOp1 = LatchCmp->getOperand(1);

393 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);

394 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&

395 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {

396 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");

397 return false;

398 }

399

400 return true;

401}

402

403

404

407 return false;

408

409

410 for (Loop *SubLp : *Lp)

412 return false;

413

414 return true;

415}

416

418 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");

419

420 if (Ty->isPointerTy())

421 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());

422

423

424

425 if (Ty->getScalarSizeInBits() < 32)

427

429}

430

437

438

439

442

443

444 if (!AllowedExit.count(Inst))

445

448

449 if (!TheLoop->contains(UI)) {

450 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');

451 return true;

452 }

453 }

454 return false;

455}

456

457

460

461 if (A == B)

462 return true;

463

464

465 Value *APtr = A->getPointerOperand();

466 Value *BPtr = B->getPointerOperand();

467 if (APtr == BPtr)

468 return true;

469

470

472}

473

475 Value *Ptr) const {

476

477

478

479

480 const auto &Strides =

482

483 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,

484 AllowRuntimeSCEVChecks, false)

485 .value_or(0);

486 if (Stride == 1 || Stride == -1)

487 return Stride;

488 return 0;

489}

490

492 return LAI->isInvariant(V);

493}

494

495namespace {

496

497

498

499

500

501

502class SCEVAddRecForUniformityRewriter

504

505 unsigned StepMultiplier;

506

507

509

510

511 Loop *TheLoop;

512

513

514 bool CannotAnalyze = false;

515

516 bool canAnalyze() const { return !CannotAnalyze; }

517

518public:

519 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,

522 TheLoop(TheLoop) {}

523

524 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {

526 "addrec outside of TheLoop must be invariant and should have been "

527 "handled earlier");

528

529

532 if (!SE.isLoopInvariant(Step, TheLoop)) {

533 CannotAnalyze = true;

534 return Expr;

535 }

536 const SCEV *NewStep =

537 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));

538 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));

539 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);

540 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);

541 }

542

543 const SCEV *visit(const SCEV *S) {

544 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))

545 return S;

547 }

548

549 const SCEV *visitUnknown(const SCEVUnknown *S) {

550 if (SE.isLoopInvariant(S, TheLoop))

551 return S;

552

553 CannotAnalyze = true;

554 return S;

555 }

556

557 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {

558

559 CannotAnalyze = true;

560 return S;

561 }

562

563 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,

564 unsigned StepMultiplier, unsigned Offset,

565 Loop *TheLoop) {

566

567

568

569

573

574 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,

575 TheLoop);

577

581 }

582};

583

584}

585

588 return true;

590 return false;

592 return true;

593

594

595

596 auto *SE = PSE.getSE();

598 return false;

600

601

602

604 const SCEV *FirstLaneExpr =

605 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);

607 return false;

608

609

610

611

613 const SCEV *IthLaneExpr =

614 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);

615 return FirstLaneExpr == IthLaneExpr;

616 });

617}

618

622 if (!Ptr)

623 return false;

624

625

626

627

629}

630

631bool LoopVectorizationLegality::canVectorizeOuterLoop() {

632 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");

633

634

635 bool Result = true;

637

639

640

642 if (!Br) {

644 "loop control flow is not understood by vectorizer",

645 "CFGNotUnderstood", ORE, TheLoop);

646 if (DoExtraAnalysis)

647 Result = false;

648 else

649 return false;

650 }

651

652

653

654

655

656

657

658 if (Br && Br->isConditional() &&

663 "loop control flow is not understood by vectorizer",

664 "CFGNotUnderstood", ORE, TheLoop);

665 if (DoExtraAnalysis)

666 Result = false;

667 else

668 return false;

669 }

670 }

671

672

673

675 TheLoop )) {

677 "loop control flow is not understood by vectorizer",

678 "CFGNotUnderstood", ORE, TheLoop);

679 if (DoExtraAnalysis)

681 else

682 return false;

683 }

684

685

686 if (!setupOuterLoopInductions()) {

688 "UnsupportedPhi", ORE, TheLoop);

689 if (DoExtraAnalysis)

691 else

692 return false;

693 }

694

696}

697

698void LoopVectorizationLegality::addInductionPhi(

701 Inductions[Phi] = ID;

702

703

704

705

706

708 if (!Casts.empty())

709 InductionCastsToIgnore.insert(*Casts.begin());

710

711 Type *PhiTy = Phi->getType();

712 const DataLayout &DL = Phi->getDataLayout();

713

715 "Expected int, ptr, or FP induction phi type");

716

717

719 if (!WidestIndTy)

721 else

723 }

724

725

727 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&

730

731

732

733

734

735 if (!PrimaryInduction || PhiTy == WidestIndTy)

736 PrimaryInduction = Phi;

737 }

738

739

740

741

742

743

744

745 if (PSE.getPredicate().isAlwaysTrue()) {

746 AllowedExit.insert(Phi);

747 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));

748 }

749

750 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");

751}

752

753bool LoopVectorizationLegality::setupOuterLoopInductions() {

754 BasicBlock *Header = TheLoop->getHeader();

755

756

757 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {

758 InductionDescriptor ID;

761 addInductionPhi(&Phi, ID, AllowedExit);

762 return true;

763 }

764

765

767 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");

768 return false;

769 };

770

771 return llvm::all_of(Header->phis(), IsSupportedPhi);

772}

773

774

775

776

777

778

779

780

781

782

786

787

788 if (Scalarize) {

789 ElementCount WidestFixedVF, WidestScalableVF;

790 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);

797 assert((WidestScalableVF.isZero() || !Scalarize) &&

798 "Caller may decide to scalarize a variant using a scalable VF");

799 }

800 return Scalarize;

801}

802

803

804

807

808

809

810 if (StructTy && !StructTy->containsHomogeneousTypes())

811 return false;

813}

814

815bool LoopVectorizationLegality::canVectorizeInstrs() {

817 bool Result = true;

818

819

821

823 Result &= canVectorizeInstr(I);

824 if (!DoExtraAnalysis && !Result)

825 return false;

826 }

827 }

828

829 if (!PrimaryInduction) {

830 if (Inductions.empty()) {

832 "Did not find one integer induction var",

833 "loop induction variable could not be identified",

834 "NoInductionVariable", ORE, TheLoop);

835 return false;

836 }

837 if (!WidestIndTy) {

839 "Did not find one integer induction var",

840 "integer loop induction variable could not be identified",

841 "NoIntegerInductionVariable", ORE, TheLoop);

842 return false;

843 }

844 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");

845 }

846

847

848

849

850 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())

851 PrimaryInduction = nullptr;

852

854}

855

856bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {

858 BasicBlock *Header = TheLoop->getHeader();

859

861 Type *PhiTy = Phi->getType();

862

866 "Found a non-int non-pointer PHI",

867 "loop control flow is not understood by vectorizer",

868 "CFGNotUnderstood", ORE, TheLoop);

869 return false;

870 }

871

872

873

874

875 if (BB != Header) {

876

877

878

879

880

881 AllowedExit.insert(&I);

882 return true;

883 }

884

885

886 if (Phi->getNumIncomingValues() != 2) {

888 "Found an invalid PHI",

889 "loop control flow is not understood by vectorizer",

890 "CFGNotUnderstood", ORE, TheLoop, Phi);

891 return false;

892 }

893

894 RecurrenceDescriptor RedDes;

896 PSE.getSE())) {

899 Reductions[Phi] = RedDes;

903 "Only min/max recurrences are allowed to have multiple uses "

904 "currently");

905 return true;

906 }

907

908

909

910

911

912 auto IsDisallowedStridedPointerInduction =

913 [](const InductionDescriptor &ID) {

915 return false;

917 ID.getConstIntStepValue() == nullptr;

918 };

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934 InductionDescriptor ID;

936 !IsDisallowedStridedPointerInduction(ID)) {

937 addInductionPhi(Phi, ID, AllowedExit);

938 Requirements->addExactFPMathInst(ID.getExactFPMathInst());

939 return true;

940 }

941

943 AllowedExit.insert(Phi);

944 FixedOrderRecurrences.insert(Phi);

945 return true;

946 }

947

948

949

951 !IsDisallowedStridedPointerInduction(ID)) {

952 addInductionPhi(Phi, ID, AllowedExit);

953 return true;

954 }

955

957 "value that could not be identified as "

958 "reduction is used outside the loop",

959 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,

960 Phi);

961 return false;

962 }

963

964

965

966

968

970 !(CI->getCalledFunction() && TLI &&

972

973

974 LibFunc Func;

975 bool IsMathLibCall =

976 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&

977 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&

978 TLI->hasOptimizedCodeGen(Func);

979

980 if (IsMathLibCall) {

981

982

983

984

986 "Found a non-intrinsic callsite",

987 "library call cannot be vectorized. "

988 "Try compiling with -fno-math-errno, -ffast-math, "

989 "or similar flags",

990 "CantVectorizeLibcall", ORE, TheLoop, CI);

991 } else {

993 "call instruction cannot be vectorized",

994 "CantVectorizeLibcall", ORE, TheLoop, CI);

995 }

996 return false;

997 }

998

999

1000

1001 if (CI) {

1002 auto *SE = PSE.getSE();

1004 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)

1006 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {

1008 "Found unvectorizable intrinsic",

1009 "intrinsic instruction cannot be vectorized",

1010 "CantVectorizeIntrinsic", ORE, TheLoop, CI);

1011 return false;

1012 }

1013 }

1014 }

1015

1016

1017

1019 VecCallVariantsFound = true;

1020

1021 auto CanWidenInstructionTy = [](Instruction const &Inst) {

1022 Type *InstTy = Inst.getType();

1025

1026

1027

1028

1031 };

1032

1033

1034

1035

1036 if (!CanWidenInstructionTy(I) ||

1041 "instruction return type cannot be vectorized",

1042 "CantVectorizeInstructionReturnType", ORE,

1043 TheLoop, &I);

1044 return false;

1045 }

1046

1047

1049 Type *T = ST->getValueOperand()->getType();

1052 "CantVectorizeStore", ORE, TheLoop, ST);

1053 return false;

1054 }

1055

1056

1057

1058 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {

1059

1061 assert(VecTy && "did not find vectorized version of stored type");

1062 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {

1064 "nontemporal store instruction cannot be vectorized",

1065 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);

1066 return false;

1067 }

1068 }

1069

1071 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {

1072

1073

1075 assert(VecTy && "did not find vectorized version of load type");

1076 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {

1078 "nontemporal load instruction cannot be vectorized",

1079 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);

1080 return false;

1081 }

1082 }

1083

1084

1085

1086

1087

1088

1089 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&

1090 I.isFast()) {

1091 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");

1092 Hints->setPotentiallyUnsafe();

1093 }

1094

1095

1096

1098

1099

1100

1101

1102 if (PSE.getPredicate().isAlwaysTrue()) {

1103 AllowedExit.insert(&I);

1104 return true;

1105 }

1107 "ValueUsedOutsideLoop", ORE, TheLoop, &I);

1108 return false;

1109 }

1110

1111 return true;

1112}

1113

1114

1115

1116

1117

1118

1119

1120

1121

1122

1123

1124

1125

1126

1130

1131

1135 return false;

1136

1137

1138

1139

1140

1141 Value *HIncVal = nullptr;

1144 return false;

1145

1146

1148 return false;

1149

1150

1152 if (GEP)

1153 return false;

1154

1155

1156 Value *HIdx = nullptr;

1157 for (Value *Index : GEP->indices()) {

1158 if (HIdx)

1159 return false;

1161 HIdx = Index;

1162 }

1163

1164 if (!HIdx)

1165 return false;

1166

1167

1168

1169

1170

1171

1172

1173

1176 return false;

1177

1178

1180 if (!AR || AR->getLoop() != TheLoop)

1181 return false;

1182

1183

1184

1188 return false;

1189

1190 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");

1191

1192

1193 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);

1194 return true;

1195}

1196

1197bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {

1198

1199

1201 return false;

1202

1203

1207

1208

1209 if (!Deps)

1210 return false;

1211

1213

1214

1217 continue;

1218

1219

1220

1221

1223 return false;

1224

1225 IUDep = &Dep;

1226 }

1227 if (!IUDep)

1228 return false;

1229

1230

1233

1234 if (!LI || !SI)

1235 return false;

1236

1237 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");

1238 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);

1239}

1240

1241bool LoopVectorizationLegality::canVectorizeMemory() {

1242 LAI = &LAIs.getInfo(*TheLoop);

1243 const OptimizationRemarkAnalysis *LAR = LAI->getReport();

1244 if (LAR) {

1245 ORE->emit([&]() {

1246 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),

1247 "loop not vectorized: ", *LAR);

1248 });

1249 }

1250

1251 if (!LAI->canVectorizeMemory()) {

1254 "Cannot vectorize unsafe dependencies in uncountable exit loop with "

1255 "side effects",

1256 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,

1257 TheLoop);

1258 return false;

1259 }

1260

1261 return canVectorizeIndirectUnsafeDependences();

1262 }

1263

1264 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {

1266 "write to a loop invariant address could not "

1267 "be vectorized",

1268 "CantVectorizeStoreToLoopInvariantAddress", ORE,

1269 TheLoop);

1270 return false;

1271 }

1272

1273

1274

1275

1276

1277 if (!LAI->getStoresToInvariantAddresses().empty()) {

1278

1279

1280 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {

1282 continue;

1283

1286 "We don't allow storing to uniform addresses",

1287 "write of conditional recurring variant value to a loop "

1288 "invariant address could not be vectorized",

1289 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);

1290 return false;

1291 }

1292

1293

1294

1295

1297 if (TheLoop->contains(Ptr)) {

1299 "Invariant address is calculated inside the loop",

1300 "write to a loop invariant address could not "

1301 "be vectorized",

1302 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);

1303 return false;

1304 }

1305 }

1306 }

1307

1308 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {

1309

1310

1311

1312

1313

1314 ScalarEvolution *SE = PSE.getSE();

1316 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {

1318

1319

1320

1321

1322

1323

1324

1325

1326

1327

1328 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {

1330 I->getValueOperand()->getType() ==

1331 SI->getValueOperand()->getType();

1332 });

1333 continue;

1334 }

1336 }

1337

1338 bool IsOK = UnhandledStores.empty();

1339

1340 if (!IsOK) {

1342 "We don't allow storing to uniform addresses",

1343 "write to a loop invariant address could not "

1344 "be vectorized",

1345 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);

1346 return false;

1347 }

1348 }

1349 }

1350

1351 PSE.addPredicate(LAI->getPSE().getPredicate());

1352 return true;

1353}

1354

1356 bool EnableStrictReductions) {

1357

1358

1359 if (!Requirements->getExactFPInst() || Hints->allowReordering())

1360 return true;

1361

1362

1363

1364

1365 if (!EnableStrictReductions ||

1369 }))

1370 return false;

1371

1372

1373

1374

1378 }));

1379}

1380

1387

1392 return false;

1393

1396 return V == InvariantAddress ||

1398 });

1399}

1400

1402 Value *In0 = const_cast<Value *>(V);

1404 if (!PN)

1405 return false;

1406

1407 return Inductions.count(PN);

1408}

1409

1413 return nullptr;

1417 return &ID;

1418 return nullptr;

1419}

1420

1424 return nullptr;

1427 return &ID;

1428 return nullptr;

1429}

1430

1432 const Value *V) const {

1434 return (Inst && InductionCastsToIgnore.count(Inst));

1435}

1436

1440

1442 const PHINode *Phi) const {

1443 return FixedOrderRecurrences.count(Phi);

1444}

1445

1448

1449

1450

1451 BasicBlock *Latch = TheLoop->getLoopLatch();

1455 "Uncountable exiting block must be a direct predecessor of latch");

1456 return BB == Latch;

1457 }

1459}

1460

1461bool LoopVectorizationLegality::blockCanBePredicated(

1465

1466

1469 continue;

1470 }

1471

1472

1473

1474

1476 continue;

1477

1478

1479

1480

1481

1484 MaskedOp.insert(CI);

1485 continue;

1486 }

1487

1488

1490 if (!SafePtrs.count(LI->getPointerOperand()))

1491 MaskedOp.insert(LI);

1492 continue;

1493 }

1494

1495

1496

1497

1498

1499

1501 MaskedOp.insert(SI);

1502 continue;

1503 }

1504

1505 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())

1506 return false;

1507 }

1508

1509 return true;

1510}

1511

1512bool LoopVectorizationLegality::canVectorizeWithIfConvert() {

1515 "IfConversionDisabled", ORE, TheLoop);

1516 return false;

1517 }

1518

1519 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");

1520

1521

1522

1523

1524

1525

1526 SmallPtrSet<Value *, 8> SafePointers;

1527

1528

1529 for (BasicBlock *BB : TheLoop->blocks()) {

1531 for (Instruction &I : *BB)

1533 SafePointers.insert(Ptr);

1534 continue;

1535 }

1536

1537

1538

1539

1540

1541

1542 ScalarEvolution &SE = *PSE.getSE();

1544 for (Instruction &I : *BB) {

1546

1547

1548

1549

1550

1551

1552

1553

1554 auto CanSpeculatePointerOp = [this](Value *Ptr) {

1556 SmallPtrSet<Value *, 4> Visited;

1557 while (!Worklist.empty()) {

1559 if (!Visited.insert(CurrV).second)

1560 continue;

1561

1563 if (!CurrI || !TheLoop->contains(CurrI)) {

1564

1565

1567 TheLoop->getLoopPredecessor()

1568 ->getTerminator()

1569 ->getIterator(),

1570 DT))

1571 return false;

1572 continue;

1573 }

1574

1575

1577 return false;

1578

1579

1580

1582 return false;

1584 }

1585 return true;

1586 };

1587

1588

1589

1591 CanSpeculatePointerOp(LI->getPointerOperand()) &&

1593 &Predicates))

1594 SafePointers.insert(LI->getPointerOperand());

1595 Predicates.clear();

1596 }

1597 }

1598

1599

1600 for (BasicBlock *BB : TheLoop->blocks()) {

1601

1602

1604 if (TheLoop->isLoopExiting(BB)) {

1606 "LoopContainsUnsupportedSwitch", ORE,

1607 TheLoop, BB->getTerminator());

1608 return false;

1609 }

1612 "LoopContainsUnsupportedTerminator", ORE,

1613 TheLoop, BB->getTerminator());

1614 return false;

1615 }

1616

1617

1619 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {

1621 "Control flow cannot be substituted for a select", "NoCFGForSelect",

1622 ORE, TheLoop, BB->getTerminator());

1623 return false;

1624 }

1625 }

1626

1627

1628 return true;

1629}

1630

1631

1632bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,

1633 bool UseVPlanNativePath) {

1635 "VPlan-native path is not enabled.");

1636

1637

1638

1639

1640

1641

1642

1643

1645 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);

1646

1647

1648

1651 "loop control flow is not understood by vectorizer",

1652 "CFGNotUnderstood", ORE, TheLoop);

1653 if (DoExtraAnalysis)

1655 else

1656 return false;

1657 }

1658

1659

1662 "loop control flow is not understood by vectorizer",

1663 "CFGNotUnderstood", ORE, TheLoop);

1664 if (DoExtraAnalysis)

1666 else

1667 return false;

1668 }

1669

1670

1674 "The loop latch terminator is not a BranchInst",

1675 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",

1676 ORE, TheLoop);

1677 if (DoExtraAnalysis)

1679 else

1680 return false;

1681 }

1682

1684}

1685

1686bool LoopVectorizationLegality::canVectorizeLoopNestCFG(

1687 Loop *Lp, bool UseVPlanNativePath) {

1688

1689

1691 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);

1692 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {

1693 if (DoExtraAnalysis)

1695 else

1696 return false;

1697 }

1698

1699

1700

1701 for (Loop *SubLp : *Lp)

1702 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {

1703 if (DoExtraAnalysis)

1705 else

1706 return false;

1707 }

1708

1710}

1711

1712bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {

1713 BasicBlock *LatchBB = TheLoop->getLoopLatch();

1714 if (!LatchBB) {

1716 "Cannot vectorize early exit loop",

1717 "NoLatchEarlyExit", ORE, TheLoop);

1718 return false;

1719 }

1720

1721 if (Reductions.size() || FixedOrderRecurrences.size()) {

1723 "Found reductions or recurrences in early-exit loop",

1724 "Cannot vectorize early exit loop with reductions or recurrences",

1725 "RecurrencesInEarlyExitLoop", ORE, TheLoop);

1726 return false;

1727 }

1728

1729 SmallVector<BasicBlock *, 8> ExitingBlocks;

1730 TheLoop->getExitingBlocks(ExitingBlocks);

1731

1732

1734 BasicBlock *SingleUncountableExitingBlock = nullptr;

1735 for (BasicBlock *BB : ExitingBlocks) {

1736 const SCEV *EC =

1737 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);

1741 "Early exiting block does not have exactly two successors",

1742 "Incorrect number of successors from early exiting block",

1743 "EarlyExitTooManySuccessors", ORE, TheLoop);

1744 return false;

1745 }

1746

1747 if (SingleUncountableExitingBlock) {

1749 "Loop has too many uncountable exits",

1750 "Cannot vectorize early exit loop with more than one early exit",

1751 "TooManyUncountableEarlyExits", ORE, TheLoop);

1752 return false;

1753 }

1754

1755 SingleUncountableExitingBlock = BB;

1756 } else

1757 CountableExitingBlocks.push_back(BB);

1758 }

1759

1760

1761

1762

1763 Predicates.clear();

1764

1765 if (!SingleUncountableExitingBlock) {

1766 LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits");

1767 return false;

1768 }

1769

1770

1771

1773 if (LatchPredBB != SingleUncountableExitingBlock) {

1775 "Cannot vectorize early exit loop",

1776 "EarlyExitNotLatchPredecessor", ORE, TheLoop);

1777 return false;

1778 }

1779

1780

1782 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {

1784 "Cannot determine exact exit count for latch block",

1785 "Cannot vectorize early exit loop",

1786 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);

1787 return false;

1788 }

1790 "Latch block not found in list of countable exits!");

1791

1792

1793

1794 auto IsSafeOperation = [](Instruction *I) -> bool {

1795 switch (I->getOpcode()) {

1796 case Instruction::Load:

1797 case Instruction::Store:

1798 case Instruction::PHI:

1799 case Instruction::Br:

1800

1801 return true;

1802 default:

1804 }

1805 };

1806

1807 bool HasSideEffects = false;

1808 for (auto *BB : TheLoop->blocks())

1809 for (auto &I : *BB) {

1810 if (I.mayWriteToMemory()) {

1812 HasSideEffects = true;

1813 continue;

1814 }

1815

1816

1818 "Complex writes to memory unsupported in early exit loops",

1819 "Cannot vectorize early exit loop with complex writes to memory",

1820 "WritesInEarlyExitLoop", ORE, TheLoop);

1821 return false;

1822 }

1823

1824 if (!IsSafeOperation(&I)) {

1826 "cannot be speculatively executed",

1827 "UnsafeOperationsEarlyExitLoop", ORE,

1828 TheLoop);

1829 return false;

1830 }

1831 }

1832

1833

1835 "Expected latch predecessor to be the early exiting block");

1836

1838

1839 if (!HasSideEffects) {

1840

1841 Predicates.clear();

1842 if (isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,

1843 &Predicates)) {

1845 "Loop may fault", "Cannot vectorize non-read-only early exit loop",

1846 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);

1847 return false;

1848 }

1849 } else if (!canUncountableExitConditionLoadBeMoved(

1850 SingleUncountableExitingBlock))

1851 return false;

1852

1853

1854 for (LoadInst *LI : NonDerefLoads) {

1855

1856 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());

1857 if (Stride != 1) {

1859 "Loop contains potentially faulting strided load",

1860 "Cannot vectorize early exit loop with "

1861 "strided fault-only-first load",

1862 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);

1863 return false;

1864 }

1865 PotentiallyFaultingLoads.insert(LI);

1866 LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI

1867 << "\n");

1868 }

1869

1870 [[maybe_unused]] const SCEV *SymbolicMaxBTC =

1871 PSE.getSymbolicMaxBackedgeTakenCount();

1872

1873

1875 "Failed to get symbolic expression for backedge taken count");

1876 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "

1877 "backedge taken count: "

1878 << *SymbolicMaxBTC << '\n');

1879 UncountableExitingBB = SingleUncountableExitingBlock;

1880 UncountableExitWithSideEffects = HasSideEffects;

1881 return true;

1882}

1883

1884bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(

1886

1887

1888

1889

1890

1891

1892

1894

1895 using namespace llvm::PatternMatch;

1897 Value *Ptr = nullptr;

1899 if (match(Br->getCondition(),

1903 "Early exit loop with store but no supported condition load",

1904 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);

1905 return false;

1906 }

1907

1908

1909 if (!TheLoop->isLoopInvariant(R)) {

1911 "Early exit loop with store but no supported condition load",

1912 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);

1913 return false;

1914 }

1915

1916

1917

1919 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {

1921 "Uncountable exit condition depends on load with an address that is "

1922 "not an add recurrence in the loop",

1923 "EarlyExitLoadInvariantAddress", ORE, TheLoop);

1924 return false;

1925 }

1926

1927

1931 &Predicates)) {

1933 "Loop may fault",

1934 "Cannot vectorize potentially faulting early exit loop",

1935 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);

1936 return false;

1937 }

1938

1939 ICFLoopSafetyInfo SafetyInfo;

1941

1942

1945 "Load for uncountable exit not guaranteed to execute",

1946 "ConditionalUncountableExitLoad", ORE, TheLoop);

1947 return false;

1948 }

1949

1950

1951

1952

1953 for (auto *BB : TheLoop->blocks()) {

1954 for (auto &I : *BB) {

1955 if (&I == Load)

1956 continue;

1957

1958 if (I.mayWriteToMemory()) {

1960 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());

1962 continue;

1963 }

1964

1966 "Cannot determine whether critical uncountable exit load address "

1967 "does not alias with a memory write",

1968 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);

1969 return false;

1970 }

1971 }

1972 }

1973

1974 return true;

1975}

1976

1978

1979

1980 bool Result = true;

1981

1982 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);

1983

1984

1985 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {

1986 if (DoExtraAnalysis) {

1987 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");

1988 Result = false;

1989 } else {

1990 return false;

1991 }

1992 }

1993

1994

1995 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()

1996 << '\n');

1997

1998

1999

2000 if (!TheLoop->isInnermost()) {

2001 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");

2002

2003 if (!canVectorizeOuterLoop()) {

2005 "UnsupportedOuterLoop", ORE, TheLoop);

2006

2007

2008 return false;

2009 }

2010

2011 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");

2012 return Result;

2013 }

2014

2015 assert(TheLoop->isInnermost() && "Inner loop expected.");

2016

2017 unsigned NumBlocks = TheLoop->getNumBlocks();

2018 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {

2019 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");

2020 if (DoExtraAnalysis)

2021 Result = false;

2022 else

2023 return false;

2024 }

2025

2026

2027 if (!canVectorizeInstrs()) {

2028 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");

2029 if (DoExtraAnalysis)

2030 Result = false;

2031 else

2032 return false;

2033 }

2034

2036 if (TheLoop->getExitingBlock()) {

2038 "UnsupportedUncountableLoop", ORE, TheLoop);

2039 if (DoExtraAnalysis)

2040 Result = false;

2041 else

2042 return false;

2043 } else {

2044 if (!isVectorizableEarlyExitLoop()) {

2047 "Must be false without vectorizable early-exit loop");

2048 if (DoExtraAnalysis)

2049 Result = false;

2050 else

2051 return false;

2052 }

2053 }

2054 }

2055

2056

2057 if (!canVectorizeMemory()) {

2058 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");

2059 if (DoExtraAnalysis)

2060 Result = false;

2061 else

2062 return false;

2063 }

2064

2065

2066 if (UncountableExitWithSideEffects) {

2068 "Writes to memory unsupported in early exit loops",

2069 "Cannot vectorize early exit loop with writes to memory",

2070 "WritesInEarlyExitLoop", ORE, TheLoop);

2071 return false;

2072 }

2073

2074 if (Result) {

2075 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"

2076 << (LAI->getRuntimePointerChecking()->Need

2077 ? " (with a runtime bound check)"

2078 : "")

2079 << "!\n");

2080 }

2081

2085

2086 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {

2087 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "

2088 "due to SCEVThreshold");

2090 "Too many SCEV assumptions need to be made and checked at runtime",

2091 "TooManySCEVRunTimeChecks", ORE, TheLoop);

2092 if (DoExtraAnalysis)

2093 Result = false;

2094 else

2095 return false;

2096 }

2097

2098

2099

2100

2101

2102 return Result;

2103}

2104

2106

2107

2108

2109

2110 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {

2113 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");

2114 return false;

2115 }

2116

2117 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");

2118

2120

2122 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());

2123

2125 PHINode *OrigPhi = Entry.first;

2126 for (User *U : OrigPhi->users()) {

2128 if (!TheLoop->contains(UI)) {

2129 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "

2130 "outside user for "

2131 << *UI << "\n");

2132 return false;

2133 }

2134 }

2135 }

2136

2137

2139

2140

2141

2143 for (BasicBlock *BB : TheLoop->blocks()) {

2144 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {

2145 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");

2146 return false;

2147 }

2148 }

2149

2150 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");

2151

2152 return true;

2153}

2154

2156

2158

2159

2160

2161 for (BasicBlock *BB : TheLoop->blocks()) {

2162 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);

2163 assert(R && "Must be able to predicate block when tail-folding.");

2164 }

2165}

2166

2167}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))

#define LV_NAME

Definition LoopVectorizationLegality.cpp:36

static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))

static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))

static const unsigned MaxInterleaveFactor

Maximum vectorization interleave count.

Definition LoopVectorizationLegality.cpp:87

static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))

static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))

static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))

static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))

This file defines the LoopVectorizationLegality class.

Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...

static bool isSimple(Instruction *I)

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

This pass exposes codegen information to IR-level passes.

Virtual Register Rewriter

static const uint32_t IV[8]

Class for arbitrary precision integers.

@ NoAlias

The two locations do not alias at all.

bool empty() const

empty - Check if the array is empty.

LLVM Basic Block Representation.

LLVM_ABI const BasicBlock * getUniquePredecessor() const

Return the predecessor of this block if it has a unique predecessor block.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

This class represents a function call, abstracting a target machine's calling convention.

A parsed version of the target data layout string in and methods for querying it.

static constexpr ElementCount getScalable(ScalarTy MinVal)

static constexpr ElementCount getFixed(ScalarTy MinVal)

constexpr bool isScalar() const

Exactly one element.

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override

Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...

void computeLoopSafetyInfo(const Loop *CurLoop) override

Computes safety information for a loop checks loop body & header for the possibility of may throw exc...

A struct for saving information about induction variables.

@ IK_FpInduction

Floating point induction variable.

@ IK_PtrInduction

Pointer induction var. Step = C.

@ IK_IntInduction

Integer induction variable. Step = C.

static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)

Returns true if Phi is an induction in the loop L.

Instruction * getExactFPMathInst()

Returns floating-point induction operator that does not allow reassociation (transforming the inducti...

Class to represent integer types.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

const MemoryDepChecker & getDepChecker() const

the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...

static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)

Return true if the block BB needs to be predicated in order for the loop to be vectorized.

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

BlockT * getLoopLatch() const

If there is a single latch block for this loop, return it.

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

unsigned getNumBackEdges() const

Calculate the number of back edges to the loop header.

iterator_range< block_iterator > blocks() const

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

bool isLoopHeader(const BlockT *BB) const

bool isInvariantStoreOfReduction(StoreInst *SI)

Returns True if given store is a final invariant store of one of the reductions found in the loop.

Definition LoopVectorizationLegality.cpp:1381

bool isInvariantAddressOfReduction(Value *V)

Returns True if given address is invariant and is used to store recurrent expression.

Definition LoopVectorizationLegality.cpp:1388

bool canVectorize(bool UseVPlanNativePath)

Returns true if it is legal to vectorize this loop.

Definition LoopVectorizationLegality.cpp:1977

bool blockNeedsPredication(const BasicBlock *BB) const

Return true if the block BB needs to be predicated in order for the loop to be vectorized.

Definition LoopVectorizationLegality.cpp:1446

int isConsecutivePtr(Type *AccessTy, Value *Ptr) const

Check if this pointer is consecutive when vectorizing.

Definition LoopVectorizationLegality.cpp:474

bool hasUncountableExitWithSideEffects() const

Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...

bool canVectorizeFPMath(bool EnableStrictReductions)

Returns true if it is legal to vectorize the FP math operations in this loop.

Definition LoopVectorizationLegality.cpp:1355

bool isFixedOrderRecurrence(const PHINode *Phi) const

Returns True if Phi is a fixed-order recurrence in this loop.

Definition LoopVectorizationLegality.cpp:1441

const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const

Returns a pointer to the induction descriptor, if Phi is pointer induction.

Definition LoopVectorizationLegality.cpp:1422

const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const

Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.

Definition LoopVectorizationLegality.cpp:1411

bool isInductionPhi(const Value *V) const

Returns True if V is a Phi node of an induction variable in this loop.

Definition LoopVectorizationLegality.cpp:1401

bool isUniform(Value *V, ElementCount VF) const

Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...

Definition LoopVectorizationLegality.cpp:586

const InductionList & getInductionVars() const

Returns the induction variables found in the loop.

bool isInvariant(Value *V) const

Returns true if V is invariant across all loop iterations according to SCEV.

Definition LoopVectorizationLegality.cpp:491

const ReductionList & getReductionVars() const

Returns the reduction variables found in the loop.

bool canFoldTailByMasking() const

Return true if we can vectorize this loop while folding its tail by masking.

Definition LoopVectorizationLegality.cpp:2105

void prepareToFoldTailByMasking()

Mark all respective loads/stores for masking.

Definition LoopVectorizationLegality.cpp:2155

bool hasUncountableEarlyExit() const

Returns true if the loop has exactly one uncountable early exit, i.e.

bool isUniformMemOp(Instruction &I, ElementCount VF) const

A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...

Definition LoopVectorizationLegality.cpp:619

BasicBlock * getUncountableEarlyExitingBlock() const

Returns the uncountable early exiting block, if there is exactly one.

bool isInductionVariable(const Value *V) const

Returns True if V can be considered as an induction variable in this loop.

Definition LoopVectorizationLegality.cpp:1437

bool isCastedInductionVariable(const Value *V) const

Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...

Definition LoopVectorizationLegality.cpp:1431

@ SK_PreferScalable

Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...

@ SK_Unspecified

Not selected.

@ SK_FixedWidthOnly

Disables vectorization with scalable vectors.

enum ForceKind getForce() const

bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const

Definition LoopVectorizationLegality.cpp:191

bool allowReordering() const

When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...

Definition LoopVectorizationLegality.cpp:270

void emitRemarkWithHints() const

Dumps all the hint information.

Definition LoopVectorizationLegality.cpp:235

ElementCount getWidth() const

@ FK_Enabled

Forcing enabled.

@ FK_Undefined

Not selected.

@ FK_Disabled

Forcing disabled.

void setAlreadyVectorized()

Mark the loop L as already vectorized by setting the width to 1.

Definition LoopVectorizationLegality.cpp:163

LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)

Definition LoopVectorizationLegality.cpp:107

const char * vectorizeAnalysisPassName() const

If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...

Definition LoopVectorizationLegality.cpp:260

unsigned getInterleave() const

unsigned getIsVectorized() const

Represents a single loop in the control flow graph.

bool isLoopInvariant(const Value *V) const

Return true if the specified value is loop invariant.

PHINode * getCanonicalInductionVariable() const

Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...

MDNode * getLoopID() const

Return the llvm.loop loop id metadata node for this loop if it is present.

const MDOperand & getOperand(unsigned I) const

ArrayRef< MDOperand > operands() const

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

unsigned getNumOperands() const

Return number of MDNode operands.

Tracking metadata reference owned by Metadata.

LLVM_ABI StringRef getString() const

static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)

iterator find(const KeyT &Key)

Checks memory dependences among accesses to the same underlying object to determine whether there vec...

const SmallVectorImpl< Dependence > * getDependences() const

Returns the memory dependences.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

The RecurrenceDescriptor is used to identify recurrences variables in a loop.

Instruction * getExactFPMathInst() const

Returns 1st non-reassociative FP instruction in the PHI node's use-chain.

static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)

Returns true if Phi is a fixed-order recurrence.

bool hasExactFPMath() const

Returns true if the recurrence has floating-point math that requires precise (ordered) operations.

Instruction * getLoopExitInstr() const

static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)

Returns true if Phi is a reduction in TheLoop.

bool hasUsesOutsideReductionChain() const

Returns true if the reduction PHI has any uses outside the reduction chain.

RecurKind getRecurrenceKind() const

bool isOrdered() const

Expose an ordered FP reduction to the instance users.

StoreInst * IntermediateStore

Reductions may store temporary or final result to an invariant address.

static bool isMinMaxRecurrenceKind(RecurKind Kind)

Returns true if the recurrence kind is any min/max kind.

const SCEV * getStart() const

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

const Loop * getLoop() const

This visitor recursively visits a SCEV expression and re-writes it.

const SCEV * visit(const SCEV *S)

This class represents an analyzed expression in the program.

The main scalar evolution driver.

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

LLVM_ABI bool isSCEVable(Type *Ty) const

Test if values of the given type are analyzable within the SCEV framework.

LLVM_ABI const SCEV * getCouldNotCompute()

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Value * getPointerOperand()

StringRef - Represent a constant reference to a string, i.e.

Provides information about what library functions are available for the current target.

void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const

Returns the largest vectorization factor used in the list of vector functions.

bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

LLVM_ABI std::string str() const

Return the twine contents as a std::string.

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntOrPtrTy() const

Return true if this is an integer type or a pointer type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

Value * getOperand(unsigned i) const

static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)

static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)

Retrieve all the VFInfo instances associated to the CallInst CI.

LLVM Value Representation.

iterator_range< user_iterator > users()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

static LLVM_ABI bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

constexpr bool isZero() const

const ParentTy * getParent() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

OneUse_match< SubPat > m_OneUse(const SubPat &SP)

TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)

Matches StoreInst.

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

bool match(Val *V, const Pattern &P)

bind_ty< Instruction > m_Instruction(Instruction *&I)

Match an instruction, capturing it if we match.

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)

OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)

Matches LoadInst.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)

Extract a Value from Metadata, if any.

Add a small namespace to avoid name clashes with the classes used in the streaming interface.

NodeAddr< PhiNode * > Phi

NodeAddr< FuncNode * > Func

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

auto successors(const MachineBasicBlock *BB)

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)

Definition LoopVectorizationLegality.cpp:405

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

static bool isUniformLoop(Loop *Lp, Loop *OuterLp)

Definition LoopVectorizationLegality.cpp:360

LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)

Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...

static bool canWidenCallReturnType(Type *Ty)

Returns true if the call return type Ty can be widened by the loop vectorizer.

Definition LoopVectorizationLegality.cpp:805

LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)

Return true if the instruction does not have any effects besides calculating the result and does not ...

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)

Definition LoopVectorizationLegality.cpp:431

static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)

Definition LoopVectorizationLegality.cpp:417

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)

Look for the loop attribute that disables all transformation heuristic.

static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition LoopVectorizationLegality.cpp:440

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)

Returns true if A and B have same pointer operands or same SCEVs addresses.

Definition LoopVectorizationLegality.cpp:458

bool canVectorizeTy(Type *Ty)

Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...

LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic has a scalar operand.

LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)

Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...

ArrayRef(const T &OneElt) -> ArrayRef< T >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Returns true if the loop contains read-only memory accesses and doesn't throw.

LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)

Create a new LoopID after the loop has been transformed.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

auto predecessors(const MachineBasicBlock *BB)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)

Find histogram operations that match high-level code in loops:

Definition LoopVectorizationLegality.cpp:1127

LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Returns true if V cannot be poison, but may be undef.

static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)

Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...

Definition LoopVectorizationLegality.cpp:783

LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...

constexpr detail::IsaCheckPredicate< Types... > IsaPred

Function object wrapper for the llvm::isa type check.

LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

bool SCEVExprContains(const SCEV *Root, PredTy Pred)

Return true if any node in Root satisfies the predicate Pred.

Dependece between memory access instructions.

Instruction * getDestination(const MemoryDepChecker &DepChecker) const

Return the destination instruction of the dependence.

Instruction * getSource(const MemoryDepChecker &DepChecker) const

Return the source instruction of the dependence.

static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)

Dependence types that don't prevent vectorization.

TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.

Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.

static LLVM_ABI const unsigned MaxVectorWidth

Maximum SIMD width.

static LLVM_ABI bool isInterleaveForced()

True if force-vector-interleave was specified by the user.

static LLVM_ABI unsigned VectorizationInterleave

Interleave factor as overridden by the user.