LLVM: lib/Analysis/LoopAccessAnalysis.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

61#include

62#include

63#include

64#include

65#include

66#include

67#include

68

69using namespace llvm;

71

72#define DEBUG_TYPE "loop-accesses"

73

76 cl::desc("Sets the SIMD width. Zero is autoselect."),

79

82 cl::desc("Sets the vectorization interleave count. "

83 "Zero is autoselect."),

87

89 "runtime-memory-check-threshold", cl::Hidden,

90 cl::desc("When performing memory disambiguation checks at runtime do not "

91 "generate more than this number of comparisons (default = 8)."),

94

95

97 "memory-check-merge-threshold", cl::Hidden,

98 cl::desc("Maximum number of comparisons done when trying to merge "

99 "runtime memory checks. (default = 100)"),

101

102

104

105

108 cl::desc("Maximum number of dependences collected by "

109 "loop-access analysis (default = 100)"),

111

112

113

114

115

116

117

118

119

120

121

122

125 cl::desc("Enable symbolic stride memory access versioning"));

126

127

128

130 "store-to-load-forwarding-conflict-detection", cl::Hidden,

131 cl::desc("Enable conflict detection in loop-access analysis"),

133

136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),

138

140 "laa-speculate-unit-stride", cl::Hidden,

141 cl::desc("Speculate that non-constant strides are unit in LAA"),

143

147 "Hoist inner loop runtime memory checks to outer loop if possible"),

150

152 return ::VectorizationInterleave.getNumOccurrences() > 0;

153}

154

159

160

161

162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);

163 if (!StrideSCEV)

164

165 return OrigSCEV;

166

167

168

169

170

172

177

178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV

179 << " by: " << *Expr << "\n");

180 return Expr;

181}

182

185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),

190 Members.push_back(Index);

191}

192

193

194

201

202

203

210

211

212

217 std::optionalScalarEvolution::LoopGuards &LoopGuards) {

220 if (!StartPtr)

221 return false;

223 bool CheckForNonNull, CheckForFreed;

224 Value *StartPtrV = StartPtr->getValue();

226 DL, CheckForNonNull, CheckForFreed);

227

228 if (DerefBytes && (CheckForNonNull || CheckForFreed))

229 return false;

230

233 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);

234

235

236 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();

237 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {

239 CtxI = LoopPred->getTerminator();

240 }

245 return false;

248 return false;

249 DerefRK = std::max(DerefRK, RK);

250 return true;

251 });

252 if (DerefRK) {

253 DerefBytesSCEV =

255 }

256

257 if (DerefBytesSCEV->isZero())

258 return false;

259

262 return false;

263

266

267

269 return false;

272

273 if (!LoopGuards)

276

277 const SCEV *OffsetAtLastIter =

279 if (!OffsetAtLastIter) {

280

281

284 return false;

286 MaxBTC, WiderTy);

287 OffsetAtLastIter =

289 if (!OffsetAtLastIter)

290 return false;

291 }

292

295 if (!OffsetEndBytes)

296 return false;

297

298 if (IsKnownNonNegative) {

299

300

301

303 if (!EndBytes)

304 return false;

305

306 DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);

308 }

309

310

311

312

316}

317

319 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,

321 DenseMap<std::pair<const SCEV *, Type *>,

322 std::pair<const SCEV *, const SCEV *>> *PointerBounds,

324 std::optionalScalarEvolution::LoopGuards &LoopGuards) {

325 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;

328 {{PtrExpr, AccessTy},

330 if (!Ins)

331 return Iter->second;

332 PtrBoundsPair = &Iter->second;

333 }

334

335 const SCEV *ScStart;

336 const SCEV *ScEnd;

337

339 Type *IdxTy = DL.getIndexType(PtrExpr->getType());

342 ScStart = ScEnd = PtrExpr;

344 ScStart = AR->getStart();

346

347

348

349

350 ScEnd = AR->evaluateAtIteration(BTC, *SE);

351 else {

352

353

354

355

356

357

358

360 DT, AC, LoopGuards)) {

361 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);

362 } else {

367 AR->getType())));

368 }

369 }

370 const SCEV *Step = AR->getStepRecurrence(*SE);

371

372

373

375 if (CStep->getValue()->isNegative())

377 } else {

378

379

380

381 ScStart = SE->getUMinExpr(ScStart, ScEnd);

382 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);

383 }

384 } else

386

389

390

391 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);

392

393 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};

395 *PtrBoundsPair = Res;

396 return Res;

397}

398

399

400

402 Type *AccessTy, bool WritePtr,

403 unsigned DepSetId, unsigned ASId,

405 bool NeedsFreeze) {

409 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),

410 &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);

413 "must be able to compute both start and end expressions");

414 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,

415 NeedsFreeze);

416}

417

418bool RuntimePointerChecking::tryToCreateDiffCheck(

420

421

422

424 return false;

425

428

429

430

433 return false;

434

439

440

441 if (AccSrc.size() != 1 || AccSink.size() != 1)

442 return false;

443

444

445 if (AccSink[0] < AccSrc[0])

447

449 const SCEV *SrcStart;

450 const SCEV *SinkStart;

452 if (match(Src->Expr,

455 match(Sink->Expr,

458 return false;

459

467 return false;

468

470 unsigned AllocSize =

471 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));

472

473

474

475

477 return false;

478

482

483

486

491 return false;

492

493

494

495

496

501 const Loop *StartARLoop = SrcStartAR->getLoop();

502 if (StartARLoop == SinkStartAR->getLoop() &&

504

505

506

507 SrcStartAR->getStepRecurrence(*SE) !=

508 SinkStartAR->getStepRecurrence(*SE)) {

509 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "

510 "cannot be hoisted out of the outer loop\n");

511 return false;

512 }

513 }

514

515 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"

516 << "SrcStart: " << *SrcStartInt << '\n'

517 << "SinkStartInt: " << *SinkStartInt << '\n');

518 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,

519 Src->NeedsFreeze || Sink->NeedsFreeze);

520 return true;

521}

522

524 SmallVector<RuntimePointerCheck, 4> Checks;

525

527 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {

530

532 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);

533 Checks.emplace_back(&CGI, &CGJ);

534 }

535 }

536 }

537 return Checks;

538}

539

542 assert(Checks.empty() && "Checks is not empty");

543 groupChecks(DepCands, UseDependencies);

545}

546

549 for (const auto &I : M.Members)

550 for (const auto &J : N.Members)

552 return true;

553 return false;

554}

555

556

557

561 if (!Diff)

562 return nullptr;

563 return Diff->isNegative() ? J : I;

564}

565

569 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,

570 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),

571 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);

572}

573

575 const SCEV *End, unsigned AS,

579 "all pointers in a checking group must be in the same address space");

580

581

582

583

585 if (!Min0)

586 return false;

587

589 if (!Min1)

590 return false;

591

592

593 if (Min0 == Start)

594 Low = Start;

595

596

597 if (Min1 != End)

599

600 Members.push_back(Index);

602 return true;

603}

604

605void RuntimePointerChecking::groupChecks(

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651 if (!UseDependencies) {

652 for (unsigned I = 0; I < Pointers.size(); ++I)

654 return;

655 }

656

657 unsigned TotalComparisons = 0;

658

660 for (unsigned Index = 0; Index < Pointers.size(); ++Index)

661 PositionMap[Pointers[Index].PointerValue].push_back(Index);

662

663

664

666

667

668

669

670 for (unsigned I = 0; I < Pointers.size(); ++I) {

671

672

674 continue;

675

678

680

681

682

683

684

685

687 auto PointerI = PositionMap.find(M.getPointer());

688

689

690 if (PointerI == PositionMap.end())

691 continue;

692 for (unsigned Pointer : PointerI->second) {

693 bool Merged = false;

694

695 Seen.insert(Pointer);

696

697

698

700

701

702

703

705 break;

706

707 TotalComparisons++;

708

709 if (Group.addPointer(Pointer, *this)) {

710 Merged = true;

711 break;

712 }

713 }

714

715 if (!Merged)

716

717

718

719 Groups.emplace_back(Pointer, *this);

720 }

721 }

722

723

724

726 }

727}

728

731 unsigned PtrIdx2) {

732 return (PtrToPartition[PtrIdx1] != -1 &&

733 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);

734}

735

739

740

742 return false;

743

744

746 return false;

747

748

750}

751

752

756 for (const auto &[Idx, CG] : enumerate(CheckingGroups))

757 PtrIndices[&CG] = Idx;

758 return PtrIndices;

759}

760

763 unsigned Depth) const {

764 unsigned N = 0;

766 for (const auto &[Check1, Check2] : Checks) {

767 const auto &First = Check1->Members, &Second = Check2->Members;

768 OS.indent(Depth) << "Check " << N++ << ":\n";

769 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)

770 << ":\n";

771 for (unsigned K : First)

773 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)

774 << ":\n";

775 for (unsigned K : Second)

777 }

778}

779

781

782 OS.indent(Depth) << "Run-time memory checks:\n";

784

785 OS.indent(Depth) << "Grouped accesses:\n";

788 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";

789 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High

790 << ")\n";

791 for (unsigned Member : CG.Members) {

793 }

794 }

795}

796

797namespace {

798

799

800

801

802

803class AccessAnalysis {

804public:

805

808

813 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),

814 PSE(PSE), LoopAliasScopes(LoopAliasScopes) {

815

816 BAA.enableCrossIterationMode();

817 }

818

819

822 AST.add(adjustLoc(Loc));

823 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);

824 if (IsReadOnly)

825 ReadOnlyPtr.insert(Ptr);

826 }

827

828

829 void addStore(const MemoryLocation &Loc, Type *AccessTy) {

831 AST.add(adjustLoc(Loc));

832 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);

833 }

834

835

836

837

838

839

840

841

842 bool createCheckForAccess(RuntimePointerChecking &RtCheck,

843 MemAccessInfo Access, Type *AccessTy,

844 const DenseMap<Value *, const SCEV *> &Strides,

845 DenseMap<Value *, unsigned> &DepSetId,

846 Loop *TheLoop, unsigned &RunningDepId,

847 unsigned ASId, bool Assume);

848

849

850

851

852

853

854

855

856

857 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,

858 const DenseMap<Value *, const SCEV *> &Strides,

859 Value *&UncomputablePtr, bool AllowPartial);

860

861

862

863 void buildDependenceSets() {

864 processMemAccesses();

865 }

866

867

868

869

870

871

872 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }

873

874

875 void resetDepChecks(MemoryDepChecker &DepChecker) {

876 CheckDeps.clear();

878 }

879

880 const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; }

881

882private:

883 typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;

884

885

886

887 MemoryLocation adjustLoc(MemoryLocation Loc) const {

888

889

893 return Loc;

894 }

895

896

897 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {

898 if (!ScopeList)

899 return nullptr;

900

901

902

904 return LoopAliasScopes.contains(cast(Scope));

905 }))

906 return nullptr;

907

908 return ScopeList;

909 }

910

911

912

913 void processMemAccesses();

914

915

916

918

919

920 const Loop *TheLoop;

921

922

923 MemAccessInfoList CheckDeps;

924

925

926 SmallPtrSet<Value*, 16> ReadOnlyPtr;

927

928

929 BatchAAResults BAA;

930

931

932

933 AliasSetTracker AST;

934

935

936 const LoopInfo *LI;

937

938

939 DominatorTree &DT;

940

941

942

943

945

946

947

948

949

950

951

952

953 bool IsRTCheckAnalysisNeeded = false;

954

955

956 PredicatedScalarEvolution &PSE;

957

958 DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;

959

960

961

962 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;

963};

964

965}

966

967

968

969static std::optional<int64_t>

973 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy

974 << "\n");

975 return std::nullopt;

976 }

977

978

979 if (Lp != AR->getLoop()) {

981 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";

982 if (Ptr)

983 dbgs() << *Ptr << " ";

984

985 dbgs() << "SCEV: " << *AR << "\n";

986 });

987 return std::nullopt;

988 }

989

990

992

993

994 const APInt *APStepVal;

997 dbgs() << "LAA: Bad stride - Not a constant strided ";

998 if (Ptr)

999 dbgs() << *Ptr << " ";

1000 dbgs() << "SCEV: " << *AR << "\n";

1001 });

1002 return std::nullopt;

1003 }

1004

1006 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);

1008

1009

1010 std::optional<int64_t> StepVal = APStepVal->trySExtValue();

1011 if (!StepVal)

1012 return std::nullopt;

1013

1014

1015 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);

1016}

1017

1018

1019

1021 Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,

1023 std::optional<int64_t> Stride = std::nullopt) {

1024

1026 return true;

1027

1029 return true;

1030

1031

1032

1033

1034

1035

1037 GEP && GEP->hasNoUnsignedSignedWrap()) {

1038

1039

1040 if (L->getHeader() == L->getLoopLatch() ||

1042 if (getLoadStorePointerOperand(U) != GEP)

1043 return false;

1044 BasicBlock *UserBB = cast(U)->getParent();

1045 return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);

1046 }))

1047 return true;

1048 }

1049

1050 if (!Stride)

1052 if (Stride) {

1053

1054

1055

1058 (Stride == 1 || Stride == -1))

1059 return true;

1060 }

1061

1062 if (Ptr && Assume) {

1065 << "LAA: Pointer: " << *Ptr << "\n"

1066 << "LAA: SCEV: " << *AR << "\n"

1067 << "LAA: Added an overflow assumption\n");

1068 return true;

1069 }

1070

1071 return false;

1072}

1073

1079

1080 while (!WorkList.empty()) {

1082 if (!Visited.insert(Ptr).second)

1083 continue;

1085

1086

1087

1088 if (PN && InnermostLoop.contains(PN->getParent()) &&

1089 PN->getParent() != InnermostLoop.getHeader()) {

1091 } else

1092 AddPointer(Ptr);

1093 }

1094}

1095

1096

1097

1098

1099

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110

1111

1112

1116 unsigned Depth) {

1117

1118

1119

1120

1125 return;

1126 }

1127

1129

1132 };

1133

1134 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {

1135 switch (Opcode) {

1136 case Instruction::Add:

1138 case Instruction::Sub:

1140 default:

1141 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");

1142 }

1143 };

1144

1146 unsigned Opcode = I->getOpcode();

1147 switch (Opcode) {

1148 case Instruction::GetElementPtr: {

1150 Type *SourceTy = GEP->getSourceElementType();

1151

1152

1153 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {

1155 break;

1156 }

1161

1162

1163 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||

1164 any_of(OffsetScevs, UndefPoisonCheck);

1165

1166

1167

1168

1169 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)

1170 BaseScevs.push_back(BaseScevs[0]);

1171 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)

1172 OffsetScevs.push_back(OffsetScevs[0]);

1173 else {

1174 ScevList.emplace_back(Scev, NeedsFreeze);

1175 break;

1176 }

1177

1179

1180

1181

1182

1184

1185 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {

1188

1189

1193 }

1194 break;

1195 }

1196 case Instruction::Select: {

1198

1199

1200

1203 if (ChildScevs.size() == 2)

1205 else

1207 break;

1208 }

1209 case Instruction::PHI: {

1211

1212

1213

1214 if (I->getNumOperands() == 2) {

1217 }

1218 if (ChildScevs.size() == 2)

1220 else

1222 break;

1223 }

1224 case Instruction::Add:

1225 case Instruction::Sub: {

1230

1231

1232 bool NeedsFreeze =

1233 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);

1234

1235

1236

1237

1238 if (LScevs.size() == 2 && RScevs.size() == 1)

1240 else if (RScevs.size() == 2 && LScevs.size() == 1)

1242 else {

1243 ScevList.emplace_back(Scev, NeedsFreeze);

1244 break;

1245 }

1246

1247 for (auto [L, R] : zip(LScevs, RScevs))

1248 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),

1249 NeedsFreeze);

1250 break;

1251 }

1252 default:

1253

1254 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");

1256 break;

1257 }

1258}

1259

1260bool AccessAnalysis::createCheckForAccess(

1264 unsigned &RunningDepId, unsigned ASId, bool Assume) {

1268

1272 "Must have some runtime-check pointer candidates");

1273

1274

1275

1276 auto IsLoopInvariantOrAR =

1280 };

1281 if (RTCheckPtrs.size() == 2 && all_of(RTCheckPtrs, IsLoopInvariantOrAR)) {

1282 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n";

1283 for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs()

1284 << "\t(" << Idx << ") " << *Q.getPointer() << "\n");

1285 } else {

1287 }

1288

1289

1290

1291 for (auto &P : RTCheckPtrs) {

1292

1294 continue;

1295

1297 if (!AR && Assume)

1300 return false;

1301

1302

1303

1304 if (RTCheckPtrs.size() == 1) {

1305 AR =

1307 P.setPointer(AR);

1308 }

1309

1310 if (isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,

1311 TheLoop, Assume, DT))

1312 return false;

1313 }

1314

1315 for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) {

1316

1317 unsigned DepId;

1318

1319 if (isDependencyCheckNeeded()) {

1321 unsigned &LeaderId = DepSetId[Leader];

1322 if (!LeaderId)

1323 LeaderId = RunningDepId++;

1324 DepId = LeaderId;

1325 } else

1326

1327 DepId = RunningDepId++;

1328

1329 bool IsWrite = Access.getInt();

1330 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,

1331 NeedsFreeze);

1332 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');

1333 }

1334

1335 return true;

1336}

1337

1338bool AccessAnalysis::canCheckPtrAtRT(

1341 bool AllowPartial) {

1342

1343

1344 bool CanDoRT = true;

1345

1346 bool MayNeedRTCheck = false;

1347 if (!IsRTCheckAnalysisNeeded) return true;

1348

1349 bool IsDepCheckNeeded = isDependencyCheckNeeded();

1350

1351

1352

1353 unsigned ASId = 0;

1354 for (const auto &AS : AST) {

1355 int NumReadPtrChecks = 0;

1356 int NumWritePtrChecks = 0;

1357 bool CanDoAliasSetRT = true;

1358 ++ASId;

1359 auto ASPointers = AS.getPointers();

1360

1361

1362

1363 unsigned RunningDepId = 1;

1365

1367

1368

1369

1371 for (const Value *ConstPtr : ASPointers) {

1372 Value *Ptr = const_cast<Value *>(ConstPtr);

1373 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));

1374 if (IsWrite)

1375 ++NumWritePtrChecks;

1376 else

1377 ++NumReadPtrChecks;

1379 }

1380

1381

1382

1383 if (NumWritePtrChecks == 0 ||

1384 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {

1385 assert((ASPointers.size() <= 1 ||

1387 [this](const Value *Ptr) {

1388 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),

1389 true);

1390 return !DepCands.contains(AccessWrite);

1391 })) &&

1392 "Can only skip updating CanDoRT below, if all entries in AS "

1393 "are reads or there is at most 1 entry");

1394 continue;

1395 }

1396

1397 for (auto &Access : AccessInfos) {

1399 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,

1400 DepSetId, TheLoop, RunningDepId, ASId,

1401 false)) {

1402 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"

1403 << *Access.getPointer() << '\n');

1405 CanDoAliasSetRT = false;

1406 }

1407 }

1408 }

1409

1410

1411

1412

1413

1414

1415

1416

1417

1418

1419 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();

1420

1421

1422

1423 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {

1424

1425

1426

1427 CanDoAliasSetRT = true;

1428 for (const auto &[Access, AccessTy] : Retries) {

1429 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,

1430 DepSetId, TheLoop, RunningDepId, ASId,

1431 true)) {

1432 CanDoAliasSetRT = false;

1433 UncomputablePtr = Access.getPointer();

1434 if (!AllowPartial)

1435 break;

1436 }

1437 }

1438 }

1439

1440 CanDoRT &= CanDoAliasSetRT;

1441 MayNeedRTCheck |= NeedsAliasSetRTCheck;

1442 ++ASId;

1443 }

1444

1445

1446

1447

1448

1449

1450 unsigned NumPointers = RtCheck.Pointers.size();

1451 for (unsigned i = 0; i < NumPointers; ++i) {

1452 for (unsigned j = i + 1; j < NumPointers; ++j) {

1453

1454 if (RtCheck.Pointers[i].DependencySetId ==

1455 RtCheck.Pointers[j].DependencySetId)

1456 continue;

1457

1458 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)

1459 continue;

1460

1463

1466 if (ASi != ASj) {

1468 dbgs() << "LAA: Runtime check would require comparison between"

1469 " different address spaces\n");

1470 return false;

1471 }

1472 }

1473 }

1474

1475 if (MayNeedRTCheck && (CanDoRT || AllowPartial))

1477

1479 << " pointer comparisons.\n");

1480

1481

1482

1483

1485

1486 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;

1487 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&

1488 "CanDoRTIfNeeded depends on RtCheck.Need");

1489 if (!CanDoRTIfNeeded && !AllowPartial)

1490 RtCheck.reset();

1491 return CanDoRTIfNeeded;

1492}

1493

1494void AccessAnalysis::processMemAccesses() {

1495

1496

1497

1498

1499 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");

1504 dbgs() << "\t" << *A.getPointer() << " ("

1505 << (A.getInt()

1506 ? "write"

1507 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"

1508 : "read"))

1509 << ")\n";

1510 });

1511

1512

1513

1514

1515

1516 for (const auto &AS : AST) {

1517

1518

1519

1520 auto ASPointers = AS.getPointers();

1521

1522 bool SetHasWrite = false;

1523

1524

1525

1527 UnderlyingObjToAccessMap;

1528 UnderlyingObjToAccessMap ObjToLastAccess;

1529

1530

1531 PtrAccessMap DeferredAccesses;

1532

1533

1534

1535 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {

1536 bool UseDeferred = SetIteration > 0;

1537 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;

1538

1539 for (const Value *ConstPtr : ASPointers) {

1540 Value *Ptr = const_cast<Value *>(ConstPtr);

1541

1542

1543

1544 for (const auto &[AC, _] : S) {

1545 if (AC.getPointer() != Ptr)

1546 continue;

1547

1548 bool IsWrite = AC.getInt();

1549

1550

1551

1552 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;

1553 if (UseDeferred && !IsReadOnlyPtr)

1554 continue;

1555

1556

1557 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||

1558 S.contains(MemAccessInfo(Ptr, false))) &&

1559 "Alias-set pointer not in the access set?");

1560

1561 MemAccessInfo Access(Ptr, IsWrite);

1563

1564

1565

1566

1567

1568

1569 if (!UseDeferred && IsReadOnlyPtr) {

1570

1571

1572 DeferredAccesses.insert({Access, {}});

1573 continue;

1574 }

1575

1576

1577

1578

1579

1580 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {

1581 CheckDeps.push_back(Access);

1582 IsRTCheckAnalysisNeeded = true;

1583 }

1584

1585 if (IsWrite)

1586 SetHasWrite = true;

1587

1588

1589

1591 UOs = {};

1594 << "Underlying objects for pointer " << *Ptr << "\n");

1595 for (const Value *UnderlyingObj : UOs) {

1596

1597

1602 continue;

1603

1604 auto [It, Inserted] = ObjToLastAccess.try_emplace(

1605 {UnderlyingObj,

1608 if (!Inserted) {

1610 It->second = Access;

1611 }

1612

1613 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");

1614 }

1615 }

1616 }

1617 }

1618 }

1619}

1620

1621

1622std::optional<int64_t>

1626 bool Assume, bool ShouldCheckWrap) {

1629 return 0;

1630

1632

1634 if (Assume && !AR)

1636

1637 if (!AR) {

1638 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr

1639 << " SCEV: " << *PtrScev << "\n");

1640 return std::nullopt;

1641 }

1642

1643 std::optional<int64_t> Stride =

1645 if (!ShouldCheckWrap || !Stride)

1646 return Stride;

1647

1648 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))

1649 return Stride;

1650

1652 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "

1653 << *Ptr << " SCEV: " << *AR << "\n");

1654 return std::nullopt;

1655}

1656

1661 bool StrictCheck, bool CheckType) {

1662 assert(PtrA && PtrB && "Expected non-nullptr pointers.");

1663

1664

1665 if (PtrA == PtrB)

1666 return 0;

1667

1668

1669 if (CheckType && ElemTyA != ElemTyB)

1670 return std::nullopt;

1671

1674

1675

1676 if (ASA != ASB)

1677 return std::nullopt;

1678 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);

1679

1680 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);

1682 DL, OffsetA, true);

1684 DL, OffsetB, true);

1685

1686 std::optional<int64_t> Val;

1687 if (PtrA1 == PtrB1) {

1688

1689

1692

1693 if (ASA != ASB)

1694 return std::nullopt;

1695

1696 IdxWidth = DL.getIndexSizeInBits(ASA);

1697 OffsetA = OffsetA.sextOrTrunc(IdxWidth);

1698 OffsetB = OffsetB.sextOrTrunc(IdxWidth);

1699

1700 OffsetB -= OffsetA;

1702 } else {

1703

1704 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);

1705 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);

1706 std::optional Diff =

1708 if (!Diff)

1709 return std::nullopt;

1710 Val = Diff->trySExtValue();

1711 }

1712

1713 if (!Val)

1714 return std::nullopt;

1715

1716 int64_t Size = DL.getTypeStoreSize(ElemTyA);

1717 int64_t Dist = *Val / Size;

1718

1719

1720

1721 if (!StrictCheck || Dist * Size == Val)

1722 return Dist;

1723 return std::nullopt;

1724}

1725

1730 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&

1731 "Expected list of pointer operands.");

1732

1733

1734 Value *Ptr0 = VL[0];

1735

1736 using DistOrdPair = std::pair<int64_t, unsigned>;

1738 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);

1739 Offsets.emplace(0, 0);

1740 bool IsConsecutive = true;

1742 std::optional<int64_t> Diff =

1744 true);

1745 if (!Diff)

1746 return false;

1747

1748

1749 int64_t Offset = *Diff;

1750 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);

1751 if (!IsInserted)

1752 return false;

1753

1754 IsConsecutive &= std::next(It) == Offsets.end();

1755 }

1756 SortedIndices.clear();

1757 if (!IsConsecutive) {

1758

1760 for (auto [Idx, Off] : enumerate(Offsets))

1761 SortedIndices[Idx] = Off.second;

1762 }

1763 return true;

1764}

1765

1766

1771 if (!PtrA || !PtrB)

1772 return false;

1775 std::optional<int64_t> Diff =

1777 true, CheckType);

1778 return Diff == 1;

1779}

1780

1783 [this, SI](Value *Ptr) {

1784 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);

1785 InstMap.push_back(SI);

1786 ++AccessIdx;

1787 });

1788}

1789

1792 [this, LI](Value *Ptr) {

1793 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);

1794 InstMap.push_back(LI);

1795 ++AccessIdx;

1796 });

1797}

1798

1801 switch (Type) {

1806

1814 }

1816}

1817

1819 switch (Type) {

1825 return false;

1826

1830 return true;

1831 }

1833}

1834

1838

1840 switch (Type) {

1843 return true;

1844

1851 return false;

1852 }

1854}

1855

1856bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,

1858 unsigned CommonStride) {

1859

1860

1861

1862

1863

1864

1865

1866

1867

1868

1869

1870

1871 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;

1872

1873 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =

1875 MaxStoreLoadForwardSafeDistanceInBits);

1876

1877

1878 for (uint64_t VF = 2 * TypeByteSize;

1879 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {

1880

1881

1882 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {

1883 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);

1884 break;

1885 }

1886 }

1887

1888 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {

1890 dbgs() << "LAA: Distance " << Distance

1891 << " that could cause a store-load forwarding conflict\n");

1892 return true;

1893 }

1894

1895 if (CommonStride &&

1896 MaxVFWithoutSLForwardIssuesPowerOf2 <

1897 MaxStoreLoadForwardSafeDistanceInBits &&

1898 MaxVFWithoutSLForwardIssuesPowerOf2 !=

1900 uint64_t MaxVF =

1901 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);

1902 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;

1903 MaxStoreLoadForwardSafeDistanceInBits =

1904 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);

1905 }

1906 return false;

1907}

1908

1910 if (Status < S)

1911 Status = S;

1912}

1913

1914

1915

1916

1917

1918

1919

1920

1921

1922

1923

1924

1925

1927 const SCEV &MaxBTC, const SCEV &Dist,

1929

1930

1931

1932

1933

1934

1935

1936

1937

1938

1939

1940

1941

1942

1943

1944

1945

1946

1949

1950 const SCEV *CastedDist = &Dist;

1951 const SCEV *CastedProduct = Product;

1952 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());

1953 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());

1954

1955

1956

1957

1958 if (DistTypeSizeBits > ProductTypeSizeBits)

1960 else

1962

1963

1964

1967 return true;

1968

1969

1970

1974}

1975

1976

1977

1978

1979

1980

1983 assert(Stride > 1 && "The stride must be greater than 1");

1984 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");

1985 assert(Distance > 0 && "The distance must be non-zero");

1986

1987

1988 if (Distance % TypeByteSize)

1989 return false;

1990

1991

1992

1993

1994

1995

1996

1997

1998

1999

2000

2001

2002

2003

2004

2005

2006

2007 return Distance % Stride;

2008}

2009

2010bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,

2011 Type *SrcTy,

2012 const SCEV *Sink,

2013 Type *SinkTy) {

2014 const SCEV *BTC = PSE.getBackedgeTakenCount();

2015 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();

2016 ScalarEvolution &SE = *PSE.getSE();

2017 const auto &[SrcStart_, SrcEnd_] =

2019 &SE, &PointerBounds, DT, AC, LoopGuards);

2021 return false;

2022

2023 const auto &[SinkStart_, SinkEnd_] =

2025 &SE, &PointerBounds, DT, AC, LoopGuards);

2028 return false;

2029

2030 if (!LoopGuards)

2032

2034 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);

2036 return true;

2037

2038 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);

2039 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);

2041}

2042

2044 MemoryDepChecker::DepDistanceStrideAndSizeInfo>

2045MemoryDepChecker::getDependenceDistanceStrideAndSize(

2046 const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,

2047 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {

2048 const auto &DL = InnermostLoop->getHeader()->getDataLayout();

2049 auto &SE = *PSE.getSE();

2050 const auto &[APtr, AIsWrite] = A;

2051 const auto &[BPtr, BIsWrite] = B;

2052

2053

2054 if (!AIsWrite && !BIsWrite)

2056

2059

2060

2061 if (APtr->getType()->getPointerAddressSpace() !=

2062 BPtr->getType()->getPointerAddressSpace())

2064

2065 std::optional<int64_t> StrideAPtr = getPtrStride(

2066 PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);

2067 std::optional<int64_t> StrideBPtr = getPtrStride(

2068 PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);

2069

2070 const SCEV *Src = PSE.getSCEV(APtr);

2071 const SCEV *Sink = PSE.getSCEV(BPtr);

2072

2073

2074

2075

2076 if (StrideAPtr && *StrideAPtr < 0) {

2080 std::swap(StrideAPtr, StrideBPtr);

2081 }

2082

2083 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);

2084

2085 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink

2086 << "\n");

2087 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst

2088 << ": " << *Dist << "\n");

2089

2090

2091

2092

2093

2094

2095

2096

2097 if (!StrideAPtr || !StrideBPtr) {

2098 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");

2100 }

2101

2102 int64_t StrideAPtrInt = *StrideAPtr;

2103 int64_t StrideBPtrInt = *StrideBPtr;

2104 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt

2105 << " Sink induction step: " << StrideBPtrInt << "\n");

2106

2107

2108 if (!StrideAPtrInt || !StrideBPtrInt)

2110

2111

2112

2113 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {

2115 dbgs() << "Pointer access with strides in different directions\n");

2117 }

2118

2119 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);

2120 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);

2121

2122

2123

2124 uint64_t ASz = DL.getTypeAllocSize(ATy);

2125 uint64_t BSz = DL.getTypeAllocSize(BTy);

2126 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;

2127

2128 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;

2129 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;

2130

2131 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);

2132

2133 std::optional<uint64_t> CommonStride;

2134 if (StrideAScaled == StrideBScaled)

2135 CommonStride = StrideAScaled;

2136

2137

2138

2140 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;

2141

2142

2144 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");

2146 }

2147

2148 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,

2149 TypeByteSize, AIsWrite, BIsWrite);

2150}

2151

2153MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,

2155 assert(AIdx < BIdx && "Must pass arguments in program order");

2156

2157

2158

2159

2160 auto CheckCompletelyBeforeOrAfter = [&]() {

2161 auto *APtr = A.getPointer();

2162 auto *BPtr = B.getPointer();

2165 const SCEV *Src = PSE.getSCEV(APtr);

2166 const SCEV *Sink = PSE.getSCEV(BPtr);

2167 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);

2168 };

2169

2170

2171

2172 auto Res =

2173 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);

2174 if (std::holds_alternativeDependence::DepType(Res)) {

2176 CheckCompletelyBeforeOrAfter())

2178 return std::getDependence::DepType(Res);

2179 }

2180

2181 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =

2182 std::get(Res);

2183 bool HasSameSize = TypeByteSize > 0;

2184

2185 ScalarEvolution &SE = *PSE.getSE();

2186 auto &DL = InnermostLoop->getHeader()->getDataLayout();

2187

2188

2189

2190

2191

2192

2193 if (HasSameSize &&

2195 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))

2197

2198

2199

2200 const APInt *APDist = nullptr;

2201 uint64_t ConstDist =

2203

2204

2205 if (APDist) {

2206

2207

2208 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&

2210 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");

2212 }

2213 } else {

2214 if (!LoopGuards)

2215 LoopGuards.emplace(

2218 }

2219

2220

2223 if (HasSameSize) {

2224

2226 }

2227 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "

2228 "different type sizes\n");

2230 }

2231

2232 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);

2233

2234

2235

2236

2237

2238

2239

2240

2242 if (!ConstDist) {

2245 }

2246 if (!HasSameSize ||

2247 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {

2249 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");

2251 }

2252 }

2253

2254 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");

2256 }

2257

2259

2260 if (MinDistance <= 0) {

2263 }

2264

2265 if (!HasSameSize) {

2266 if (CheckCompletelyBeforeOrAfter())

2268 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "

2269 "different type sizes\n");

2271 }

2272

2277

2278 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);

2279

2280

2281

2282

2283

2284

2285

2286

2287

2288

2289

2290

2291

2292

2293

2294

2295

2296

2297

2298

2299

2300

2301

2302

2303

2304

2305

2306

2307

2308

2309

2310

2311

2312

2313 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;

2314 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {

2315 if (!ConstDist) {

2316

2317

2318

2319

2322 }

2323 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "

2324 << MinDistance << '\n');

2326 }

2327

2328

2329

2330 if (MinDistanceNeeded > MinDepDistBytes) {

2331 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "

2332 << MinDistanceNeeded << " size in bytes\n");

2334 }

2335

2336 MinDepDistBytes =

2337 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);

2338

2339 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);

2341 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))

2343

2344 uint64_t MaxVF = MinDepDistBytes / MaxStride;

2345 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance

2346 << " with max VF = " << MaxVF << '\n');

2347

2348 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;

2349 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {

2350

2351

2352

2353

2356 }

2357

2358 if (CheckCompletelyBeforeOrAfter())

2360

2361 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);

2363}

2364

2367

2368 MinDepDistBytes = -1;

2371 if (Visited.contains(CurAccess))

2372 continue;

2373

2374

2379

2380

2381 while (AI != AE) {

2382 Visited.insert(*AI);

2383 bool AIIsWrite = AI->getInt();

2384

2385

2387 (AIIsWrite ? AI : std::next(AI));

2388 while (OI != AE) {

2389

2390 auto &Acc = Accesses[*AI];

2391 for (std::vector::iterator I1 = Acc.begin(), I1E = Acc.end();

2392 I1 != I1E; ++I1)

2393

2394

2395 for (std::vector::iterator

2396 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),

2397 I2E = (OI == AI ? I1E : Accesses[*OI].end());

2398 I2 != I2E; ++I2) {

2399 auto A = std::make_pair(&*AI, *I1);

2400 auto B = std::make_pair(&*OI, *I2);

2401

2403 if (*I1 > *I2)

2405

2407 isDependent(*A.first, A.second, *B.first, B.second);

2409

2410

2411

2412

2413

2414 if (RecordDependences) {

2416 Dependences.emplace_back(A.second, B.second, Type);

2417

2419 RecordDependences = false;

2420 Dependences.clear();

2422 << "Too many dependences, stopped recording\n");

2423 }

2424 }

2426 return false;

2427 }

2428 ++OI;

2429 }

2430 ++AI;

2431 }

2432 }

2433

2434 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");

2436}

2437

2441 auto I = Accesses.find(Access);

2443 if (I != Accesses.end()) {

2444 transform(I->second, std::back_inserter(Insts),

2445 [&](unsigned Idx) { return this->InstMap[Idx]; });

2446 }

2447

2448 return Insts;

2449}

2450

2452 "NoDep",

2453 "Unknown",

2454 "IndirectUnsafe",

2455 "Forward",

2456 "ForwardButPreventsForwarding",

2457 "Backward",

2458 "BackwardVectorizable",

2459 "BackwardVectorizableButPreventsForwarding"};

2460

2468

2469bool LoopAccessInfo::canAnalyzeLoop() {

2470

2473 << TheLoop->getLocStr() << "\n");

2474

2475

2477 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");

2478 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";

2479 return false;

2480 }

2481

2482

2485 dbgs() << "LAA: loop control flow is not understood by analyzer\n");

2486 recordAnalysis("CFGNotUnderstood")

2487 << "loop control flow is not understood by analyzer";

2488 return false;

2489 }

2490

2491

2492

2493

2496 recordAnalysis("CantComputeNumberOfIterations")

2497 << "could not determine number of loop iterations";

2498 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");

2499 return false;

2500 }

2501

2502 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "

2504 return true;

2505}

2506

2507bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,

2508 const TargetLibraryInfo *TLI,

2509 DominatorTree *DT) {

2510

2513 SmallPtrSet<MDNode *, 8> LoopAliasScopes;

2514

2515

2516 unsigned NumReads = 0;

2517 unsigned NumReadWrites = 0;

2518

2519 bool HasComplexMemInst = false;

2520

2521

2522 HasConvergentOp = false;

2523

2524 PtrRtChecking->Pointers.clear();

2525 PtrRtChecking->Need = false;

2526

2528

2529 const bool EnableMemAccessVersioningOfLoop =

2532

2533

2534

2535 LoopBlocksRPO RPOT(TheLoop);

2536 RPOT.perform(LI);

2537 for (BasicBlock *BB : RPOT) {

2538

2539

2540 for (Instruction &I : *BB) {

2543 HasConvergentOp = true;

2544 }

2545

2546

2547

2548 if (HasComplexMemInst && HasConvergentOp)

2549 return false;

2550

2551

2552 if (HasComplexMemInst)

2553 continue;

2554

2555

2557 for (Metadata *Op : Decl->getScopeList()->operands())

2559

2560

2561

2562

2565 continue;

2566

2567

2568

2569

2570 if (I.mayReadFromMemory()) {

2571 auto hasPointerArgs = [](CallBase *CB) {

2572 return any_of(CB->args(), [](Value const *Arg) {

2573 return Arg->getType()->isPointerTy();

2574 });

2575 };

2576

2577

2578

2579

2582 continue;

2583

2585 if (!Ld) {

2586 recordAnalysis("CantVectorizeInstruction", Ld)

2587 << "instruction cannot be vectorized";

2588 HasComplexMemInst = true;

2589 continue;

2590 }

2591 if (!Ld->isSimple() && !IsAnnotatedParallel) {

2592 recordAnalysis("NonSimpleLoad", Ld)

2593 << "read with atomic ordering or volatile read";

2594 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");

2595 HasComplexMemInst = true;

2596 continue;

2597 }

2598 NumLoads++;

2601 if (EnableMemAccessVersioningOfLoop)

2602 collectStridedAccess(Ld);

2603 continue;

2604 }

2605

2606

2607 if (I.mayWriteToMemory()) {

2609 if (!St) {

2610 recordAnalysis("CantVectorizeInstruction", St)

2611 << "instruction cannot be vectorized";

2612 HasComplexMemInst = true;

2613 continue;

2614 }

2615 if (!St->isSimple() && !IsAnnotatedParallel) {

2616 recordAnalysis("NonSimpleStore", St)

2617 << "write with atomic ordering or volatile write";

2618 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");

2619 HasComplexMemInst = true;

2620 continue;

2621 }

2622 NumStores++;

2625 if (EnableMemAccessVersioningOfLoop)

2626 collectStridedAccess(St);

2627 }

2628 }

2629 }

2630

2631 if (HasComplexMemInst)

2632 return false;

2633

2634

2635

2636

2637

2638

2639 if (!Stores.size()) {

2640 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");

2641 return true;

2642 }

2643

2645 AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,

2646 LoopAliasScopes);

2647

2648

2649

2650

2651

2652

2653 SmallSet<std::pair<Value *, Type *>, 16> Seen;

2654

2655

2656

2657 SmallPtrSet<Value *, 16> UniformStores;

2658

2659 for (StoreInst *ST : Stores) {

2660 Value *Ptr = ST->getPointerOperand();

2661

2662 if (isInvariant(Ptr)) {

2663

2664 StoresToInvariantAddresses.push_back(ST);

2665 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=

2666 !UniformStores.insert(Ptr).second;

2667 }

2668

2669

2670

2672 if (Seen.insert({Ptr, AccessTy}).second) {

2673 ++NumReadWrites;

2674

2676

2677

2678

2679 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))

2681

2683 [&Accesses, AccessTy, Loc](Value *Ptr) {

2684 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);

2685 Accesses.addStore(NewLoc, AccessTy);

2686 });

2687 }

2688 }

2689

2690 if (IsAnnotatedParallel) {

2692 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "

2693 << "checks.\n");

2694 return true;

2695 }

2696

2697 for (LoadInst *LD : Loads) {

2698 Value *Ptr = LD->getPointerOperand();

2699

2700

2701

2702

2703

2704

2705

2706

2707 bool IsReadOnlyPtr = false;

2709 if (Seen.insert({Ptr, AccessTy}).second ||

2710 getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,

2711 true)) {

2712 ++NumReads;

2713 IsReadOnlyPtr = true;

2714 }

2715

2716

2717

2718 if (UniformStores.contains(Ptr)) {

2719 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "

2720 "load and uniform store to the same address!\n");

2721 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;

2722 }

2723

2725

2726

2727

2728 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))

2730

2732 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {

2733 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);

2734 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);

2735 });

2736 }

2737

2738

2739

2740 if (NumReadWrites == 1 && NumReads == 0) {

2741 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");

2742 return true;

2743 }

2744

2745

2746

2747 Accesses.buildDependenceSets();

2748

2749

2750

2751 Value *UncomputablePtr = nullptr;

2752 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(

2753 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);

2754 if (!HasCompletePtrRtChecking) {

2756 recordAnalysis("CantIdentifyArrayBounds", I)

2757 << "cannot identify array bounds";

2758 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "

2759 << "the array bounds.\n");

2760 return false;

2761 }

2762

2764 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");

2765

2766 bool DepsAreSafe = true;

2767 if (Accesses.isDependencyCheckNeeded()) {

2768 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");

2769 DepsAreSafe =

2770 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());

2771

2773 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");

2774

2775

2776 Accesses.resetDepChecks(*DepChecker);

2777

2778 PtrRtChecking->reset();

2779 PtrRtChecking->Need = true;

2780

2781 UncomputablePtr = nullptr;

2782 HasCompletePtrRtChecking =

2783 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,

2784 UncomputablePtr, AllowPartial);

2785

2786

2787 if (!HasCompletePtrRtChecking) {

2789 recordAnalysis("CantCheckMemDepsAtRunTime", I)

2790 << "cannot check memory dependencies at runtime";

2791 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");

2792 return false;

2793 }

2794 DepsAreSafe = true;

2795 }

2796 }

2797

2798 if (HasConvergentOp) {

2799 recordAnalysis("CantInsertRuntimeCheckWithConvergent")

2800 << "cannot add control dependency to convergent operation";

2801 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "

2802 "would be needed with a convergent operation\n");

2803 return false;

2804 }

2805

2806 if (DepsAreSafe) {

2808 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"

2809 << (PtrRtChecking->Need ? "" : " don't")

2810 << " need runtime memory checks.\n");

2811 return true;

2812 }

2813

2814 emitUnsafeDependenceRemark();

2815 return false;

2816}

2817

2818void LoopAccessInfo::emitUnsafeDependenceRemark() {

2819 const auto *Deps = getDepChecker().getDependences();

2820 if (!Deps)

2821 return;

2822 const auto *Found =

2823 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {

2826 });

2827 if (Found == Deps->end())

2828 return;

2829 MemoryDepChecker::Dependence Dep = *Found;

2830

2831 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");

2832

2833

2834 bool HasForcedDistribution = false;

2835 std::optional<const MDOperand *> Value =

2838 const MDOperand *Op = *Value;

2841 }

2842

2843 const std::string Info =

2844 HasForcedDistribution

2845 ? "unsafe dependent memory operations in loop."

2846 : "unsafe dependent memory operations in loop. Use "

2847 "#pragma clang loop distribute(enable) to allow loop distribution "

2848 "to attempt to isolate the offending operations into a separate "

2849 "loop";

2850 OptimizationRemarkAnalysis &R =

2851 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;

2852

2853 switch (Dep.Type) {

2859 R << "\nBackward loop carried data dependence.";

2860 break;

2862 R << "\nForward loop carried data dependence that prevents "

2863 "store-to-load forwarding.";

2864 break;

2866 R << "\nBackward loop carried data dependence that prevents "

2867 "store-to-load forwarding.";

2868 break;

2870 R << "\nUnsafe indirect dependence.";

2871 break;

2873 R << "\nUnknown data dependence.";

2874 break;

2875 }

2876

2877 if (Instruction *I = Dep.getSource(getDepChecker())) {

2878 DebugLoc SourceLoc = I->getDebugLoc();

2880 SourceLoc = DD->getDebugLoc();

2881 if (SourceLoc)

2882 R << " Memory location is the same as accessed at "

2883 << ore::NV("Location", SourceLoc);

2884 }

2885}

2886

2888 const Loop *TheLoop,

2890 assert(TheLoop->contains(BB) && "Unknown block used");

2891

2892

2893 const BasicBlock *Latch = TheLoop->getLoopLatch();

2894 return !DT->dominates(BB, Latch);

2895}

2896

2899 assert(!Report && "Multiple reports generated");

2900

2903

2904 if (I) {

2905 CodeRegion = I->getParent();

2906

2907

2908 if (I->getDebugLoc())

2909 DL = I->getDebugLoc();

2910 }

2911

2912 Report = std::make_unique(DEBUG_TYPE, RemarkName,

2913 DL, CodeRegion);

2914 return *Report;

2915}

2916

2918 auto *SE = PSE->getSE();

2919 if (TheLoop->isLoopInvariant(V))

2920 return true;

2922 return false;

2925}

2926

2927

2928

2932 if (GEP)

2933 return Ptr;

2934

2936 for (const Use &U : GEP->operands()) {

2938 if (V == Ptr)

2939 V = U;

2940 else

2941

2942 return Ptr;

2943 }

2944 }

2945 return V;

2946}

2947

2948

2949

2952 if (!PtrTy)

2953 return nullptr;

2954

2955

2956

2957

2958 Value *OrigPtr = Ptr;

2959

2962

2963 if (Ptr != OrigPtr)

2964

2966 V = C->getOperand();

2967

2969 return nullptr;

2970

2971

2972

2974 return nullptr;

2975

2976

2978 return V;

2979

2982 return V;

2983

2984 return nullptr;

2985}

2986

2987void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {

2989 if (!Ptr)

2990 return;

2991

2992

2993

2994

2995

2996

2997

2999 if (!StrideExpr)

3000 return;

3001

3003 return;

3004

3005 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "

3006 "versioning:");

3007 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");

3008

3010 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");

3011 return;

3012 }

3013

3014

3015

3016

3017

3018

3019

3020

3021

3022

3023

3024

3025

3026

3027 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();

3028

3029

3030

3031

3033 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());

3034 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());

3035 const SCEV *CastedStride = StrideExpr;

3036 const SCEV *CastedBECount = MaxBTC;

3037 ScalarEvolution *SE = PSE->getSE();

3038 if (BETypeSizeBits >= StrideTypeSizeBits)

3040 else

3042 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);

3043

3044

3045

3048 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "

3049 "Stride==1 predicate will imply that the loop executes "

3050 "at most once.\n");

3051 return;

3052 }

3053 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");

3054

3055

3056

3057 const SCEV *StrideBase = StrideExpr;

3059 StrideBase = C->getOperand();

3061}

3062

3069 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {

3070 unsigned MaxTargetVectorWidthInBits = std::numeric_limits::max();

3071 if (TTI && TTI->enableScalableVectorization())

3072

3073

3074 MaxTargetVectorWidthInBits =

3076

3077 DepChecker = std::make_unique(

3078 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);

3079 PtrRtChecking =

3080 std::make_unique(*DepChecker, SE, LoopGuards);

3081 if (canAnalyzeLoop())

3082 CanVecMem = analyzeLoop(AA, LI, TLI, DT);

3083}

3084

3086 if (CanVecMem) {

3087 OS.indent(Depth) << "Memory dependences are safe";

3090 OS << " with a maximum safe vector width of "

3094 OS << ", with a maximum safe store-load forward width of " << SLDist

3095 << " bits";

3096 }

3097 if (PtrRtChecking->Need)

3098 OS << " with run-time checks";

3099 OS << "\n";

3100 }

3101

3102 if (HasConvergentOp)

3103 OS.indent(Depth) << "Has convergent operation in loop\n";

3104

3105 if (Report)

3106 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";

3107

3108 if (auto *Dependences = DepChecker->getDependences()) {

3110 for (const auto &Dep : *Dependences) {

3111 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());

3112 OS << "\n";

3113 }

3114 } else

3115 OS.indent(Depth) << "Too many dependences, not recorded\n";

3116

3117

3118 PtrRtChecking->print(OS, Depth);

3119 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)

3120 OS.indent(Depth) << "Generated run-time checks are incomplete\n";

3121 OS << "\n";

3122

3124 << "Non vectorizable stores to invariant address were "

3125 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||

3126 HasLoadStoreDependenceInvolvingLoopInvariantAddress

3127 ? ""

3128 : "not ")

3129 << "found in loop.\n";

3130

3131 OS.indent(Depth) << "SCEV assumptions:\n";

3132 PSE->getPredicate().print(OS, Depth);

3133

3134 OS << "\n";

3135

3136 OS.indent(Depth) << "Expressions re-written:\n";

3137 PSE->print(OS, Depth);

3138}

3139

3141 bool AllowPartial) {

3142 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);

3143

3144

3145

3146 if (Inserted || It->second->hasAllowPartial() != AllowPartial)

3147 It->second = std::make_unique(&L, &SE, TTI, TLI, &AA, &DT,

3148 &LI, AC, AllowPartial);

3149

3150 return *It->second;

3151}

3153

3154

3155

3156

3157 for (const auto &[L, LAI] : LoopAccessInfoMap) {

3158 if (LAI->getRuntimePointerChecking()->getChecks().empty() &&

3159 LAI->getPSE().getPredicate().isAlwaysTrue())

3160 continue;

3161 LoopAccessInfoMap.erase(L);

3162 }

3163}

3164

3167 FunctionAnalysisManager::Invalidator &Inv) {

3168

3171

3172 return true;

3173

3174

3175

3176

3177 return Inv.invalidate<AAManager>(F, PA) ||

3181}

3182

3194

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

This file contains the declarations for the subclasses of Constant, which represent the different fla...

DXIL Forward Handle Accesses

This file defines the DenseMap class.

Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...

This header defines various interfaces for pass management in LLVM.

static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))

We collect dependences up to this threshold.

static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))

Enable store-to-load forwarding conflict detection.

static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)

Definition LoopAccessAnalysis.cpp:1113

static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))

The maximum iterations used to merge memory checks.

static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)

Get the stride of a pointer access in a loop.

Definition LoopAccessAnalysis.cpp:2950

static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)

Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...

Definition LoopAccessAnalysis.cpp:213

static std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)

Try to compute a constant stride for AR.

Definition LoopAccessAnalysis.cpp:970

static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))

static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))

static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)

Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.

Definition LoopAccessAnalysis.cpp:754

static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))

static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))

static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)

Definition LoopAccessAnalysis.cpp:1074

static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt)

Check whether AR is a non-wrapping AddRec.

Definition LoopAccessAnalysis.cpp:1020

static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)

Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...

Definition LoopAccessAnalysis.cpp:1926

static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)

Check the dependence for two accesses with the same stride Stride.

Definition LoopAccessAnalysis.cpp:1981

static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)

Compare I and J and return the minimum.

Definition LoopAccessAnalysis.cpp:558

static const SCEV * mulSCEVOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)

Returns A * B, if it is guaranteed not to unsigned wrap.

Definition LoopAccessAnalysis.cpp:204

static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)

If Ptr is a GEP, which has a loop-variant operand, return that operand.

Definition LoopAccessAnalysis.cpp:2929

static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))

static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))

static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))

This enables versioning on the strides of symbolically striding memory accesses in code like the foll...

static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)

Returns A + B, if it is guaranteed not to unsigned wrap.

Definition LoopAccessAnalysis.cpp:195

This header provides classes for managing per-loop analyses.

This file provides utility analysis objects describing memory locations.

FunctionAnalysisManager FAM

This file defines the PointerIntPair class.

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

static const X86InstrFMA3Group Groups[]

A manager for alias analyses.

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

APInt abs() const

Get the absolute value.

LLVM_ABI APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

std::optional< int64_t > trySExtValue() const

Get sign extended value if possible.

int64_t getSExtValue() const

Get sign extended value.

This templated class represents "all analyses that operate over " (e....

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

A function analysis which provides an AssumptionCache.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this basic block belongs to.

bool isNoBuiltin() const

Return true if the call should not be treated as a call to a builtin.

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

bool isConvergent() const

Determine if the invoke is convergent.

@ ICMP_UGE

unsigned greater or equal

@ ICMP_SGE

signed greater or equal

@ ICMP_ULE

unsigned less or equal

static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

A parsed version of the target data layout string in and methods for querying it.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

Analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

iterator_range< member_iterator > members(const ECValue &ECV) const

bool contains(const ElemTy &V) const

Returns true if V is contained an equivalence class.

const ECValue & insert(const ElemTy &Data)

insert - Insert a new value into the union/find set, ignoring the request if the value already exists...

member_iterator member_end() const

const ElemTy & getLeaderValue(const ElemTy &V) const

getLeaderValue - Return the leader for the specified value that is in the set.

member_iterator findLeader(const ElemTy &V) const

findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.

member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)

union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...

bool hasOptSize() const

Optimize this function for size (-Os) or minimum size (-Oz).

PointerType * getType() const

Global values are always pointers.

Class to represent integer types.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

An instruction for reading from memory.

Value * getPointerOperand()

static constexpr LocationSize beforeOrAfterPointer()

Any location before or after the base pointer (but still within the underlying object).

This analysis provides dependence information for the memory accesses of a loop.

LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)

Definition LoopAccessAnalysis.cpp:3183

LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)

Definition LoopAccessAnalysis.cpp:3165

LLVM_ABI void clear()

Definition LoopAccessAnalysis.cpp:3152

LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)

Definition LoopAccessAnalysis.cpp:3140

Drive the analysis of memory accesses in the loop.

const MemoryDepChecker & getDepChecker() const

the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...

LLVM_ABI bool isInvariant(Value *V) const

Returns true if value V is loop invariant.

Definition LoopAccessAnalysis.cpp:2917

LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const

Print the information about the memory accesses in the loop.

Definition LoopAccessAnalysis.cpp:3085

static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)

Return true if the block BB needs to be predicated in order for the loop to be vectorized.

Definition LoopAccessAnalysis.cpp:2887

LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)

Definition LoopAccessAnalysis.cpp:3063

Analysis pass that exposes the LoopInfo for a function.

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

unsigned getNumBackEdges() const

Calculate the number of back edges to the loop header.

BlockT * getHeader() const

LoopT * getParentLoop() const

Return the parent loop if it exists or nullptr for top level loops.

Represents a single loop in the control flow graph.

std::string getLocStr() const

Return a string containing the debug location of the loop (file name + line number if present,...

bool isAnnotatedParallel() const

Returns true if the loop is annotated parallel.

DebugLoc getStartLoc() const

Return the debug location of the start of this loop.

ArrayRef< MDOperand > operands() const

Checks memory dependences among accesses to the same underlying object to determine whether there vec...

ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const

Return the program order indices for the access location (Ptr, IsWrite).

bool isSafeForAnyStoreLoadForwardDistances() const

Return true if there are no store-load forwarding dependencies.

bool isSafeForAnyVectorWidth() const

Return true if the number of elements that are safe to operate on simultaneously is not bounded.

LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, const MemAccessInfoList &CheckDeps)

Check whether the dependencies between the accesses are safe, and records the dependence information ...

Definition LoopAccessAnalysis.cpp:2365

EquivalenceClasses< MemAccessInfo > DepCandidates

Set of potential dependent memory accesses.

bool shouldRetryWithRuntimeChecks() const

In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...

const Loop * getInnermostLoop() const

uint64_t getMaxSafeVectorWidthInBits() const

Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...

bool isSafeForVectorization() const

No memory dependence was encountered that would inhibit vectorization.

SmallVector< MemAccessInfo, 8 > MemAccessInfoList

LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const

Find the set of instructions that read or write via Ptr.

Definition LoopAccessAnalysis.cpp:2439

VectorizationSafetyStatus

Type to keep track of the status of the dependence check.

@ PossiblySafeWithRtChecks

LLVM_ABI void addAccess(StoreInst *SI)

Register the location (instructions are given increasing numbers) of a write access.

Definition LoopAccessAnalysis.cpp:1781

PointerIntPair< Value *, 1, bool > MemAccessInfo

uint64_t getStoreLoadForwardSafeDistanceInBits() const

Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...

Representation for a specific memory location.

static LLVM_ABI MemoryLocation get(const LoadInst *LI)

Return a location with information about the memory reference by the given instruction.

LocationSize Size

The maximum size of the location, in address-units, or UnknownSize if the size is not known.

AAMDNodes AATags

The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...

const Value * Ptr

The address of the start of the location.

PointerIntPair - This class implements a pair of a pointer and small integer.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

LLVM_ABI void addPredicate(const SCEVPredicate &Pred)

Adds a new predicate.

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)

Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.

LLVM_ABI void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)

Proves that V doesn't overflow by adding SCEV predicate.

LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)

Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.

LLVM_ABI const SCEV * getBackedgeTakenCount()

Get the (predicated) backedge count for the analyzed loop.

LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()

Get the (predicated) symbolic max backedge count for the analyzed loop.

LLVM_ABI const SCEV * getSCEV(Value *V)

Returns the SCEV expression of V, in the context of the current SCEV predicate.

A set of analyses that are preserved following a run of a transformation pass.

PreservedAnalysisChecker getChecker() const

Build a checker for this PreservedAnalyses and the specified analysis type.

Holds information about the memory runtime legality checks to verify that a group of pointers do not ...

bool Need

This flag indicates if we need to add the runtime check.

void reset()

Reset the state of the pointer runtime information.

unsigned getNumberOfChecks() const

Returns the number of run-time checks required according to needsChecking.

LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const

Print Checks.

Definition LoopAccessAnalysis.cpp:761

LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const

Decide if we need to add a check between two groups of pointers, according to needsChecking.

Definition LoopAccessAnalysis.cpp:547

LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const

Print the list run-time memory checks necessary.

Definition LoopAccessAnalysis.cpp:780

SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups

Holds a partitioning of pointers into "check groups".

LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)

Generate the checks and store it.

Definition LoopAccessAnalysis.cpp:540

friend struct RuntimeCheckingPtrGroup

static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)

Check if pointers are in the same partition.

Definition LoopAccessAnalysis.cpp:729

SmallVector< PointerInfo, 2 > Pointers

Information about the pointers that may require checking.

LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)

Insert a pointer and calculate the start and end SCEVs.

Definition LoopAccessAnalysis.cpp:401

This node represents a polynomial recurrence on the trip count of the specified loop.

const SCEV * getStart() const

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

const Loop * getLoop() const

This class represents a constant integer value.

ConstantInt * getValue() const

const APInt & getAPInt() const

NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const

This class represents an analyzed expression in the program.

LLVM_ABI bool isZero() const

Return true if the expression is a constant zero.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

Analysis pass that exposes the ScalarEvolution for a function.

static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)

Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...

The main scalar evolution driver.

const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)

When successful, this returns a SCEVConstant that is greater than or equal to (i.e.

LLVM_ABI bool isKnownNonNegative(const SCEV *S)

Test if the given expression is known to be non-negative.

LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)

Return the SCEV object corresponding to -V.

LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const

LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)

LLVM_ABI bool isKnownNonPositive(const SCEV *S)

Test if the given expression is known to be non-positive.

LLVM_ABI bool isKnownNegative(const SCEV *S)

Test if the given expression is known to be negative.

LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)

LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)

Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?

LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)

LLVM_ABI const SCEV * getConstant(ConstantInt *V)

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)

Return a SCEV corresponding to a conversion of the input value to the specified type.

const SCEV * getOne(Type *Ty)

Return a SCEV for the constant 1 of a specific type.

LLVM_ABI const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)

LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

LLVM_ABI bool isKnownPositive(const SCEV *S)

Test if the given expression is known to be positive.

LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)

LLVM_ABI bool isSCEVable(Type *Ty) const

Test if values of the given type are analyzable within the SCEV framework.

LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const

Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...

LLVM_ABI const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)

APInt getSignedRangeMin(const SCEV *S)

Determine the min of the signed range for a particular SCEV.

LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)

Return an expression for the store size of StoreTy that is type IntTy.

LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)

Return a SCEV corresponding to a conversion of the input value to the specified type.

LLVM_ABI const SCEV * getCouldNotCompute()

LLVM_ABI const SCEV * getPointerBase(const SCEV *V)

Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...

LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)

Try to apply information from loop guards for L to Expr.

LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)

Return an expression for a TypeSize.

LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)

Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...

LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)

Return a SCEV corresponding to a conversion of the input value to the specified type.

LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)

Test if the given expression is known to satisfy the condition described by Pred, LHS,...

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

bool contains(const T &V) const

Check if the SmallSet contains the given element.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

StringRef - Represent a constant reference to a string, i.e.

Analysis pass providing the TargetTransformInfo.

Analysis pass providing the TargetLibraryInfo.

Provides information about what library functions are available for the current target.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

A Use represents the edge between a Value definition and its users.

static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)

Retrieve all the VFInfo instances associated to the CallInst CI.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI bool canBeFreed() const

Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...

LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const

Accumulate the constant offset this value has compared to a base pointer.

LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const

Returns the number of bytes known to be dereferenceable for the pointer value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

constexpr ScalarTy getFixedValue() const

An efficient, type-erasing, non-owning reference to a callable.

This class implements an extremely fast bulk output stream that can only output to a stream.

raw_ostream & indent(unsigned NumSpaces)

indent - Insert 'NumSpaces' spaces.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Abstract Attribute helper functions.

@ C

The default llvm calling convention, compatible with C.

bool match(Val *V, const Pattern &P)

bind_cst_ty m_scev_APInt(const APInt *&C)

Match an SCEV constant and bind it to an APInt.

is_undef_or_poison m_scev_UndefOrPoison()

Match an SCEVUnknown wrapping undef or poison.

class_match< const SCEVConstant > m_SCEVConstant()

specificloop_ty m_SpecificLoop(const Loop *L)

SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)

specificscev_ty m_scev_Specific(const SCEV *S)

Match if we have a specific specified SCEV.

class_match< const SCEV > m_SCEV()

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)

Check whether Metadata has a Value.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

DiagnosticInfoOptimizationBase::Argument NV

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

LLVM_ABI bool willNotFreeBetween(const Instruction *Assume, const Instruction *CtxI)

Returns true, if no instruction between Assume and CtxI may free memory and the function is marked as...

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI RetainedKnowledge getKnowledgeForValue(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, function_ref< bool(RetainedKnowledge, Instruction *, const CallBase::BundleOpInfo *)> Filter=[](auto...) { return true;})

Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and it match...

LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)

Return true if it is valid to use the assumptions provided by an assume intrinsic,...

LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

unsigned getPointerAddressSpace(const Type *T)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)

Find string metadata for loop.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

auto dyn_cast_if_present(const Y &Val)

dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

const Value * getPointerOperand(const Value *V)

A helper function that returns the pointer operand of a load, store or GEP instruction.

auto dyn_cast_or_null(const Y &Val)

OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)

Wrapper function around std::transform to apply a function to a range and store the result elsewhere.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)

Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)

Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...

Definition LoopAccessAnalysis.cpp:1657

LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)

Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...

Definition LoopAccessAnalysis.cpp:1726

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)

Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...

Definition LoopAccessAnalysis.cpp:155

LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)

Returns true if the memory operations A and B are consecutive.

Definition LoopAccessAnalysis.cpp:1767

DWARFExpression::Operation Op

LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Return true if this function can prove that V does not have undef bits and is never poison.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

T bit_floor(T Value)

Returns the largest integral power of two no greater than Value if Value is nonzero.

LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)

This method is similar to getUnderlyingObject except that it can look through phi and select instruct...

LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)

Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...

Definition LoopAccessAnalysis.cpp:318

LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

Definition LoopAccessAnalysis.cpp:1623

Implement std::hash so that hash_code can be used in STL containers.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

IR Values for the lower and upper bounds of a pointer evolution.

MDNode * Scope

The tag for alias scope specification (used with noalias).

MDNode * TBAA

The tag for type-based alias analysis.

MDNode * NoAlias

The tag specifying the noalias scope.

A special type used by analysis passes to provide an address that identifies that particular analysis...

Instruction * getDestination(const MemoryDepChecker &DepChecker) const

Return the destination instruction of the dependence.

DepType Type

The type of the dependence.

unsigned Destination

Index of the destination of the dependence in the InstMap vector.

LLVM_ABI bool isPossiblyBackward() const

May be a lexically backward dependence type (includes Unknown).

Definition LoopAccessAnalysis.cpp:1835

Instruction * getSource(const MemoryDepChecker &DepChecker) const

Return the source instruction of the dependence.

LLVM_ABI bool isForward() const

Lexically forward dependence.

Definition LoopAccessAnalysis.cpp:1839

LLVM_ABI bool isBackward() const

Lexically backward dependence.

Definition LoopAccessAnalysis.cpp:1818

LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const

Print the dependence.

Definition LoopAccessAnalysis.cpp:2461

unsigned Source

Index of the source of the dependence in the InstMap vector.

DepType

The type of the dependence.

@ BackwardVectorizableButPreventsForwarding

@ ForwardButPreventsForwarding

static LLVM_ABI const char * DepName[]

String version of the types.

static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)

Dependence types that don't prevent vectorization.

Definition LoopAccessAnalysis.cpp:1800

Represent one information held inside an operand bundle of an llvm.assume.

unsigned AddressSpace

Address space of the involved pointers.

LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)

Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.

Definition LoopAccessAnalysis.cpp:566

bool NeedsFreeze

Whether the pointer needs to be frozen after expansion, e.g.

LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)

Create a new pointer checking group containing a single pointer, with index Index in RtCheck.

Definition LoopAccessAnalysis.cpp:183

const SCEV * High

The SCEV expression which represents the upper bound of all the pointers in this group.

SmallVector< unsigned, 2 > Members

Indices of all the pointers that constitute this grouping.

const SCEV * Low

The SCEV expression which represents the lower bound of all the pointers in this group.

bool IsWritePtr

Holds the information if this pointer is used for writing to memory.

unsigned DependencySetId

Holds the id of the set of pointers that could be dependent because of a shared underlying object.

unsigned AliasSetId

Holds the id of the disjoint alias set to which this pointer belongs.

static LLVM_ABI const unsigned MaxVectorWidth

Maximum SIMD width.

static LLVM_ABI unsigned VectorizationFactor

VF as overridden by the user.

static LLVM_ABI unsigned RuntimeMemoryCheckThreshold

\When performing memory disambiguation checks at runtime do not make more than this number of compari...

static LLVM_ABI bool isInterleaveForced()

True if force-vector-interleave was specified by the user.

Definition LoopAccessAnalysis.cpp:151

static LLVM_ABI unsigned VectorizationInterleave

Interleave factor as overridden by the user.

static LLVM_ABI bool HoistRuntimeChecks

Function object to check whether the first component of a container supported by std::get (like std::...