LLVM: lib/Analysis/LoopAccessAnalysis.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

61#include

62#include

63#include

64#include

65#include

66#include

67#include

68

69using namespace llvm;

71

72#define DEBUG_TYPE "loop-accesses"

73

76 cl::desc("Sets the SIMD width. Zero is autoselect."),

79

82 cl::desc("Sets the vectorization interleave count. "

83 "Zero is autoselect."),

87

89 "runtime-memory-check-threshold", cl::Hidden,

90 cl::desc("When performing memory disambiguation checks at runtime do not "

91 "generate more than this number of comparisons (default = 8)."),

94

95

97 "memory-check-merge-threshold", cl::Hidden,

98 cl::desc("Maximum number of comparisons done when trying to merge "

99 "runtime memory checks. (default = 100)"),

101

102

104

105

108 cl::desc("Maximum number of dependences collected by "

109 "loop-access analysis (default = 100)"),

111

112

113

114

115

116

117

118

119

120

121

122

125 cl::desc("Enable symbolic stride memory access versioning"));

126

127

128

130 "store-to-load-forwarding-conflict-detection", cl::Hidden,

131 cl::desc("Enable conflict detection in loop-access analysis"),

133

136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),

138

140 "laa-speculate-unit-stride", cl::Hidden,

141 cl::desc("Speculate that non-constant strides are unit in LAA"),

143

147 "Hoist inner loop runtime memory checks to outer loop if possible"),

150

152 return ::VectorizationInterleave.getNumOccurrences() > 0;

153}

154

159

160

161

162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);

163 if (!StrideSCEV)

164

165 return OrigSCEV;

166

167

168

169

170

172

177

178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV

179 << " by: " << *Expr << "\n");

180 return Expr;

181}

182

185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),

190 Members.push_back(Index);

191}

192

193

194

201

202

203

210

211

212

217 std::optionalScalarEvolution::LoopGuards &LoopGuards) {

220 if (!StartPtr)

221 return false;

223 bool CheckForNonNull, CheckForFreed;

224 Value *StartPtrV = StartPtr->getValue();

226 DL, CheckForNonNull, CheckForFreed);

227

228 if (DerefBytes && (CheckForNonNull || CheckForFreed))

229 return false;

230

233 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);

234

235

236 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();

237 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {

239 CtxI = LoopPred->getTerminator();

240 }

245 return false;

248 return false;

249 DerefRK = std::max(DerefRK, RK);

250 return true;

251 });

252 if (DerefRK) {

253 DerefBytesSCEV =

255 }

256

257 if (DerefBytesSCEV->isZero())

258 return false;

259

262 return false;

263

266

267

269 return false;

272

273 if (!LoopGuards)

276

277 const SCEV *OffsetAtLastIter =

279 if (!OffsetAtLastIter) {

280

281

284 return false;

286 MaxBTC, WiderTy);

287 OffsetAtLastIter =

289 if (!OffsetAtLastIter)

290 return false;

291 }

292

295 if (!OffsetEndBytes)

296 return false;

297

298 if (IsKnownNonNegative) {

299

300

301

303 if (!EndBytes)

304 return false;

305

306 DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);

308 }

309

310

311

312

316}

317

319 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,

321 DenseMap<std::pair<const SCEV *, Type *>,

322 std::pair<const SCEV *, const SCEV *>> *PointerBounds,

324 std::optionalScalarEvolution::LoopGuards &LoopGuards) {

325 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;

328 {{PtrExpr, AccessTy},

330 if (!Ins)

331 return Iter->second;

332 PtrBoundsPair = &Iter->second;

333 }

334

335 const SCEV *ScStart;

336 const SCEV *ScEnd;

337

339 Type *IdxTy = DL.getIndexType(PtrExpr->getType());

342 ScStart = ScEnd = PtrExpr;

344 ScStart = AR->getStart();

346

347

348

349

350 ScEnd = AR->evaluateAtIteration(BTC, *SE);

351 else {

352

353

354

355

356

357

358

360 DT, AC, LoopGuards)) {

361 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);

362 } else {

366 ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType())));

367 }

368 }

369 const SCEV *Step = AR->getStepRecurrence(*SE);

370

371

372

374 if (CStep->getValue()->isNegative())

376 } else {

377

378

379

380 ScStart = SE->getUMinExpr(ScStart, ScEnd);

381 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);

382 }

383 } else

385

388

389

390 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);

391

392 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};

394 *PtrBoundsPair = Res;

395 return Res;

396}

397

398

399

401 Type *AccessTy, bool WritePtr,

402 unsigned DepSetId, unsigned ASId,

404 bool NeedsFreeze) {

408 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),

409 &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);

412 "must be able to compute both start and end expressions");

413 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,

414 NeedsFreeze);

415}

416

417bool RuntimePointerChecking::tryToCreateDiffCheck(

419

420

421

423 return false;

424

427

428

429

432 return false;

433

438

439

440 if (AccSrc.size() != 1 || AccSink.size() != 1)

441 return false;

442

443

444 if (AccSink[0] < AccSrc[0])

446

448 const SCEV *SrcStart;

449 const SCEV *SinkStart;

451 if (match(Src->Expr,

454 match(Sink->Expr,

457 return false;

458

466 return false;

467

469 unsigned AllocSize =

470 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));

471

472

473

474

476 return false;

477

481

482

485

490 return false;

491

492

493

494

495

500 const Loop *StartARLoop = SrcStartAR->getLoop();

501 if (StartARLoop == SinkStartAR->getLoop() &&

503

504

505

506 SrcStartAR->getStepRecurrence(*SE) !=

507 SinkStartAR->getStepRecurrence(*SE)) {

508 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "

509 "cannot be hoisted out of the outer loop\n");

510 return false;

511 }

512 }

513

514 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"

515 << "SrcStart: " << *SrcStartInt << '\n'

516 << "SinkStartInt: " << *SinkStartInt << '\n');

517 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,

518 Src->NeedsFreeze || Sink->NeedsFreeze);

519 return true;

520}

521

523 SmallVector<RuntimePointerCheck, 4> Checks;

524

526 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {

529

531 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);

532 Checks.emplace_back(&CGI, &CGJ);

533 }

534 }

535 }

536 return Checks;

537}

538

541 assert(Checks.empty() && "Checks is not empty");

542 groupChecks(DepCands, UseDependencies);

544}

545

548 for (const auto &I : M.Members)

549 for (const auto &J : N.Members)

551 return true;

552 return false;

553}

554

555

556

560 if (!Diff)

561 return nullptr;

562 return Diff->isNegative() ? J : I;

563}

564

568 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,

569 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),

570 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);

571}

572

574 const SCEV *End, unsigned AS,

578 "all pointers in a checking group must be in the same address space");

579

580

581

582

584 if (!Min0)

585 return false;

586

588 if (!Min1)

589 return false;

590

591

592 if (Min0 == Start)

593 Low = Start;

594

595

596 if (Min1 != End)

598

599 Members.push_back(Index);

601 return true;

602}

603

604void RuntimePointerChecking::groupChecks(

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650 if (!UseDependencies) {

651 for (unsigned I = 0; I < Pointers.size(); ++I)

653 return;

654 }

655

656 unsigned TotalComparisons = 0;

657

659 for (unsigned Index = 0; Index < Pointers.size(); ++Index)

660 PositionMap[Pointers[Index].PointerValue].push_back(Index);

661

662

663

665

666

667

668

669 for (unsigned I = 0; I < Pointers.size(); ++I) {

670

671

673 continue;

674

677

679

680

681

682

683

684

686 auto PointerI = PositionMap.find(M.getPointer());

687

688

689 if (PointerI == PositionMap.end())

690 continue;

691 for (unsigned Pointer : PointerI->second) {

692 bool Merged = false;

693

694 Seen.insert(Pointer);

695

696

697

699

700

701

702

704 break;

705

706 TotalComparisons++;

707

708 if (Group.addPointer(Pointer, *this)) {

709 Merged = true;

710 break;

711 }

712 }

713

714 if (!Merged)

715

716

717

718 Groups.emplace_back(Pointer, *this);

719 }

720 }

721

722

723

725 }

726}

727

730 unsigned PtrIdx2) {

731 return (PtrToPartition[PtrIdx1] != -1 &&

732 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);

733}

734

738

739

741 return false;

742

743

745 return false;

746

747

749}

750

751

755 for (const auto &[Idx, CG] : enumerate(CheckingGroups))

756 PtrIndices[&CG] = Idx;

757 return PtrIndices;

758}

759

762 unsigned Depth) const {

763 unsigned N = 0;

765 for (const auto &[Check1, Check2] : Checks) {

766 const auto &First = Check1->Members, &Second = Check2->Members;

767 OS.indent(Depth) << "Check " << N++ << ":\n";

768 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)

769 << ":\n";

770 for (unsigned K : First)

772 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)

773 << ":\n";

774 for (unsigned K : Second)

776 }

777}

778

780

781 OS.indent(Depth) << "Run-time memory checks:\n";

783

784 OS.indent(Depth) << "Grouped accesses:\n";

787 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";

788 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High

789 << ")\n";

790 for (unsigned Member : CG.Members) {

792 }

793 }

794}

795

796namespace {

797

798

799

800

801

802class AccessAnalysis {

803public:

804

807

812 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),

813 PSE(PSE), LoopAliasScopes(LoopAliasScopes) {

814

815 BAA.enableCrossIterationMode();

816 }

817

818

821 AST.add(adjustLoc(Loc));

822 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);

823 if (IsReadOnly)

824 ReadOnlyPtr.insert(Ptr);

825 }

826

827

828 void addStore(const MemoryLocation &Loc, Type *AccessTy) {

830 AST.add(adjustLoc(Loc));

831 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);

832 }

833

834

835

836

837

838

839

840

841 bool createCheckForAccess(RuntimePointerChecking &RtCheck,

842 MemAccessInfo Access, Type *AccessTy,

843 const DenseMap<Value *, const SCEV *> &Strides,

844 DenseMap<Value *, unsigned> &DepSetId,

845 Loop *TheLoop, unsigned &RunningDepId,

846 unsigned ASId, bool Assume);

847

848

849

850

851

852

853

854

855

856 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,

857 const DenseMap<Value *, const SCEV *> &Strides,

858 Value *&UncomputablePtr, bool AllowPartial);

859

860

861

862 void buildDependenceSets() {

863 processMemAccesses();

864 }

865

866

867

868

869

870

871 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }

872

873

874 void resetDepChecks(MemoryDepChecker &DepChecker) {

875 CheckDeps.clear();

877 }

878

879 const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; }

880

881private:

882 typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;

883

884

885

886 MemoryLocation adjustLoc(MemoryLocation Loc) const {

887

888

892 return Loc;

893 }

894

895

896 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {

897 if (!ScopeList)

898 return nullptr;

899

900

901

903 return LoopAliasScopes.contains(cast(Scope));

904 }))

905 return nullptr;

906

907 return ScopeList;

908 }

909

910

911

912 void processMemAccesses();

913

914

915

917

918

919 const Loop *TheLoop;

920

921

922 MemAccessInfoList CheckDeps;

923

924

925 SmallPtrSet<Value*, 16> ReadOnlyPtr;

926

927

928 BatchAAResults BAA;

929

930

931

932 AliasSetTracker AST;

933

934

935 const LoopInfo *LI;

936

937

938 DominatorTree &DT;

939

940

941

942

944

945

946

947

948

949

950

951

952 bool IsRTCheckAnalysisNeeded = false;

953

954

955 PredicatedScalarEvolution &PSE;

956

957 DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;

958

959

960

961 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;

962};

963

964}

965

966

967

968static std::optional<int64_t>

972 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy

973 << "\n");

974 return std::nullopt;

975 }

976

977

978 if (Lp != AR->getLoop()) {

980 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";

981 if (Ptr)

982 dbgs() << *Ptr << " ";

983

984 dbgs() << "SCEV: " << *AR << "\n";

985 });

986 return std::nullopt;

987 }

988

989

991

992

993 const APInt *APStepVal;

996 dbgs() << "LAA: Bad stride - Not a constant strided ";

997 if (Ptr)

998 dbgs() << *Ptr << " ";

999 dbgs() << "SCEV: " << *AR << "\n";

1000 });

1001 return std::nullopt;

1002 }

1003

1005 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);

1007

1008

1009 std::optional<int64_t> StepVal = APStepVal->trySExtValue();

1010 if (!StepVal)

1011 return std::nullopt;

1012

1013

1014 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);

1015}

1016

1017

1018

1020 Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,

1022 std::optional<int64_t> Stride = std::nullopt) {

1023

1025 return true;

1026

1028 return true;

1029

1030

1031

1032

1033

1034

1036 GEP && GEP->hasNoUnsignedSignedWrap()) {

1037

1038

1039 if (L->getHeader() == L->getLoopLatch() ||

1041 if (getLoadStorePointerOperand(U) != GEP)

1042 return false;

1043 BasicBlock *UserBB = cast(U)->getParent();

1044 return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);

1045 }))

1046 return true;

1047 }

1048

1049 if (!Stride)

1051 if (Stride) {

1052

1053

1054

1057 (Stride == 1 || Stride == -1))

1058 return true;

1059 }

1060

1061 if (Ptr && Assume) {

1064 << "LAA: Pointer: " << *Ptr << "\n"

1065 << "LAA: SCEV: " << *AR << "\n"

1066 << "LAA: Added an overflow assumption\n");

1067 return true;

1068 }

1069

1070 return false;

1071}

1072

1078

1079 while (!WorkList.empty()) {

1081 if (!Visited.insert(Ptr).second)

1082 continue;

1084

1085

1086

1087 if (PN && InnermostLoop.contains(PN->getParent()) &&

1088 PN->getParent() != InnermostLoop.getHeader()) {

1090 } else

1091 AddPointer(Ptr);

1092 }

1093}

1094

1095

1096

1097

1098

1099

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110

1111

1115 unsigned Depth) {

1116

1117

1118

1119

1124 return;

1125 }

1126

1128

1131 };

1132

1133 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {

1134 switch (Opcode) {

1135 case Instruction::Add:

1137 case Instruction::Sub:

1139 default:

1140 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");

1141 }

1142 };

1143

1145 unsigned Opcode = I->getOpcode();

1146 switch (Opcode) {

1147 case Instruction::GetElementPtr: {

1149 Type *SourceTy = GEP->getSourceElementType();

1150

1151

1152 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {

1154 break;

1155 }

1160

1161

1162 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||

1163 any_of(OffsetScevs, UndefPoisonCheck);

1164

1165

1166

1167

1168 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)

1169 BaseScevs.push_back(BaseScevs[0]);

1170 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)

1171 OffsetScevs.push_back(OffsetScevs[0]);

1172 else {

1173 ScevList.emplace_back(Scev, NeedsFreeze);

1174 break;

1175 }

1176

1178

1179

1180

1181

1183

1184 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {

1187

1188

1192 }

1193 break;

1194 }

1195 case Instruction::Select: {

1197

1198

1199

1202 if (ChildScevs.size() == 2)

1204 else

1206 break;

1207 }

1208 case Instruction::PHI: {

1210

1211

1212

1213 if (I->getNumOperands() == 2) {

1216 }

1217 if (ChildScevs.size() == 2)

1219 else

1221 break;

1222 }

1223 case Instruction::Add:

1224 case Instruction::Sub: {

1229

1230

1231 bool NeedsFreeze =

1232 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);

1233

1234

1235

1236

1237 if (LScevs.size() == 2 && RScevs.size() == 1)

1239 else if (RScevs.size() == 2 && LScevs.size() == 1)

1241 else {

1242 ScevList.emplace_back(Scev, NeedsFreeze);

1243 break;

1244 }

1245

1246 for (auto [L, R] : zip(LScevs, RScevs))

1247 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),

1248 NeedsFreeze);

1249 break;

1250 }

1251 default:

1252

1253 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");

1255 break;

1256 }

1257}

1258

1259bool AccessAnalysis::createCheckForAccess(

1263 unsigned &RunningDepId, unsigned ASId, bool Assume) {

1267

1271 "Must have some runtime-check pointer candidates");

1272

1273

1274

1275 auto IsLoopInvariantOrAR =

1279 };

1280 if (RTCheckPtrs.size() == 2 && all_of(RTCheckPtrs, IsLoopInvariantOrAR)) {

1281 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n";

1282 for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs()

1283 << "\t(" << Idx << ") " << *Q.getPointer() << "\n");

1284 } else {

1286 }

1287

1288

1289

1290 for (auto &P : RTCheckPtrs) {

1291

1293 continue;

1294

1296 if (!AR && Assume)

1299 return false;

1300

1301

1302

1303 if (RTCheckPtrs.size() == 1) {

1304 AR =

1306 P.setPointer(AR);

1307 }

1308

1309 if (isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,

1310 TheLoop, Assume, DT))

1311 return false;

1312 }

1313

1314 for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) {

1315

1316 unsigned DepId;

1317

1318 if (isDependencyCheckNeeded()) {

1320 unsigned &LeaderId = DepSetId[Leader];

1321 if (!LeaderId)

1322 LeaderId = RunningDepId++;

1323 DepId = LeaderId;

1324 } else

1325

1326 DepId = RunningDepId++;

1327

1328 bool IsWrite = Access.getInt();

1329 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,

1330 NeedsFreeze);

1331 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');

1332 }

1333

1334 return true;

1335}

1336

1337bool AccessAnalysis::canCheckPtrAtRT(

1340 bool AllowPartial) {

1341

1342

1343 bool CanDoRT = true;

1344

1345 bool MayNeedRTCheck = false;

1346 if (!IsRTCheckAnalysisNeeded) return true;

1347

1348 bool IsDepCheckNeeded = isDependencyCheckNeeded();

1349

1350

1351

1352 unsigned ASId = 0;

1353 for (const auto &AS : AST) {

1354 int NumReadPtrChecks = 0;

1355 int NumWritePtrChecks = 0;

1356 bool CanDoAliasSetRT = true;

1357 ++ASId;

1358 auto ASPointers = AS.getPointers();

1359

1360

1361

1362 unsigned RunningDepId = 1;

1364

1366

1367

1368

1370 for (const Value *ConstPtr : ASPointers) {

1371 Value *Ptr = const_cast<Value *>(ConstPtr);

1372 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));

1373 if (IsWrite)

1374 ++NumWritePtrChecks;

1375 else

1376 ++NumReadPtrChecks;

1378 }

1379

1380

1381

1382 if (NumWritePtrChecks == 0 ||

1383 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {

1384 assert((ASPointers.size() <= 1 ||

1386 [this](const Value *Ptr) {

1387 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),

1388 true);

1389 return !DepCands.contains(AccessWrite);

1390 })) &&

1391 "Can only skip updating CanDoRT below, if all entries in AS "

1392 "are reads or there is at most 1 entry");

1393 continue;

1394 }

1395

1396 for (auto &Access : AccessInfos) {

1398 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,

1399 DepSetId, TheLoop, RunningDepId, ASId,

1400 false)) {

1401 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"

1402 << *Access.getPointer() << '\n');

1404 CanDoAliasSetRT = false;

1405 }

1406 }

1407 }

1408

1409

1410

1411

1412

1413

1414

1415

1416

1417

1418 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();

1419

1420

1421

1422 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {

1423

1424

1425

1426 CanDoAliasSetRT = true;

1427 for (const auto &[Access, AccessTy] : Retries) {

1428 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,

1429 DepSetId, TheLoop, RunningDepId, ASId,

1430 true)) {

1431 CanDoAliasSetRT = false;

1432 UncomputablePtr = Access.getPointer();

1433 if (!AllowPartial)

1434 break;

1435 }

1436 }

1437 }

1438

1439 CanDoRT &= CanDoAliasSetRT;

1440 MayNeedRTCheck |= NeedsAliasSetRTCheck;

1441 ++ASId;

1442 }

1443

1444

1445

1446

1447

1448

1449 unsigned NumPointers = RtCheck.Pointers.size();

1450 for (unsigned i = 0; i < NumPointers; ++i) {

1451 for (unsigned j = i + 1; j < NumPointers; ++j) {

1452

1453 if (RtCheck.Pointers[i].DependencySetId ==

1454 RtCheck.Pointers[j].DependencySetId)

1455 continue;

1456

1457 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)

1458 continue;

1459

1462

1465 if (ASi != ASj) {

1467 dbgs() << "LAA: Runtime check would require comparison between"

1468 " different address spaces\n");

1469 return false;

1470 }

1471 }

1472 }

1473

1474 if (MayNeedRTCheck && (CanDoRT || AllowPartial))

1476

1478 << " pointer comparisons.\n");

1479

1480

1481

1482

1484

1485 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;

1486 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&

1487 "CanDoRTIfNeeded depends on RtCheck.Need");

1488 if (!CanDoRTIfNeeded && !AllowPartial)

1489 RtCheck.reset();

1490 return CanDoRTIfNeeded;

1491}

1492

1493void AccessAnalysis::processMemAccesses() {

1494

1495

1496

1497

1498 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");

1503 dbgs() << "\t" << *A.getPointer() << " ("

1504 << (A.getInt()

1505 ? "write"

1506 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"

1507 : "read"))

1508 << ")\n";

1509 });

1510

1511

1512

1513

1514

1515 for (const auto &AS : AST) {

1516

1517

1518

1519 auto ASPointers = AS.getPointers();

1520

1521 bool SetHasWrite = false;

1522

1523

1524

1526 UnderlyingObjToAccessMap;

1527 UnderlyingObjToAccessMap ObjToLastAccess;

1528

1529

1530 PtrAccessMap DeferredAccesses;

1531

1532

1533

1534 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {

1535 bool UseDeferred = SetIteration > 0;

1536 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;

1537

1538 for (const Value *ConstPtr : ASPointers) {

1539 Value *Ptr = const_cast<Value *>(ConstPtr);

1540

1541

1542

1543 for (const auto &[AC, _] : S) {

1544 if (AC.getPointer() != Ptr)

1545 continue;

1546

1547 bool IsWrite = AC.getInt();

1548

1549

1550

1551 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;

1552 if (UseDeferred && !IsReadOnlyPtr)

1553 continue;

1554

1555

1556 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||

1557 S.contains(MemAccessInfo(Ptr, false))) &&

1558 "Alias-set pointer not in the access set?");

1559

1560 MemAccessInfo Access(Ptr, IsWrite);

1562

1563

1564

1565

1566

1567

1568 if (!UseDeferred && IsReadOnlyPtr) {

1569

1570

1571 DeferredAccesses.insert({Access, {}});

1572 continue;

1573 }

1574

1575

1576

1577

1578

1579 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {

1580 CheckDeps.push_back(Access);

1581 IsRTCheckAnalysisNeeded = true;

1582 }

1583

1584 if (IsWrite)

1585 SetHasWrite = true;

1586

1587

1588

1590 UOs = {};

1593 << "Underlying objects for pointer " << *Ptr << "\n");

1594 for (const Value *UnderlyingObj : UOs) {

1595

1596

1601 continue;

1602

1603 auto [It, Inserted] = ObjToLastAccess.try_emplace(

1604 {UnderlyingObj,

1607 if (!Inserted) {

1609 It->second = Access;

1610 }

1611

1612 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");

1613 }

1614 }

1615 }

1616 }

1617 }

1618}

1619

1620

1621std::optional<int64_t>

1625 bool Assume, bool ShouldCheckWrap) {

1628 return 0;

1629

1631

1633 if (Assume && !AR)

1635

1636 if (!AR) {

1637 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr

1638 << " SCEV: " << *PtrScev << "\n");

1639 return std::nullopt;

1640 }

1641

1642 std::optional<int64_t> Stride =

1644 if (!ShouldCheckWrap || !Stride)

1645 return Stride;

1646

1647 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))

1648 return Stride;

1649

1651 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "

1652 << *Ptr << " SCEV: " << *AR << "\n");

1653 return std::nullopt;

1654}

1655

1660 bool StrictCheck, bool CheckType) {

1661 assert(PtrA && PtrB && "Expected non-nullptr pointers.");

1662

1663

1664 if (PtrA == PtrB)

1665 return 0;

1666

1667

1668 if (CheckType && ElemTyA != ElemTyB)

1669 return std::nullopt;

1670

1673

1674

1675 if (ASA != ASB)

1676 return std::nullopt;

1677 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);

1678

1679 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);

1681 DL, OffsetA, true);

1683 DL, OffsetB, true);

1684

1685 std::optional<int64_t> Val;

1686 if (PtrA1 == PtrB1) {

1687

1688

1691

1692 if (ASA != ASB)

1693 return std::nullopt;

1694

1695 IdxWidth = DL.getIndexSizeInBits(ASA);

1696 OffsetA = OffsetA.sextOrTrunc(IdxWidth);

1697 OffsetB = OffsetB.sextOrTrunc(IdxWidth);

1698

1699 OffsetB -= OffsetA;

1701 } else {

1702

1703 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);

1704 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);

1705 std::optional Diff =

1707 if (!Diff)

1708 return std::nullopt;

1709 Val = Diff->trySExtValue();

1710 }

1711

1712 if (!Val)

1713 return std::nullopt;

1714

1715 int64_t Size = DL.getTypeStoreSize(ElemTyA);

1716 int64_t Dist = *Val / Size;

1717

1718

1719

1720 if (!StrictCheck || Dist * Size == Val)

1721 return Dist;

1722 return std::nullopt;

1723}

1724

1729 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&

1730 "Expected list of pointer operands.");

1731

1732

1733 Value *Ptr0 = VL[0];

1734

1735 using DistOrdPair = std::pair<int64_t, unsigned>;

1737 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);

1738 Offsets.emplace(0, 0);

1739 bool IsConsecutive = true;

1741 std::optional<int64_t> Diff =

1743 true);

1744 if (!Diff)

1745 return false;

1746

1747

1748 int64_t Offset = *Diff;

1749 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);

1750 if (!IsInserted)

1751 return false;

1752

1753 IsConsecutive &= std::next(It) == Offsets.end();

1754 }

1755 SortedIndices.clear();

1756 if (!IsConsecutive) {

1757

1759 for (auto [Idx, Off] : enumerate(Offsets))

1760 SortedIndices[Idx] = Off.second;

1761 }

1762 return true;

1763}

1764

1765

1770 if (!PtrA || !PtrB)

1771 return false;

1774 std::optional<int64_t> Diff =

1776 true, CheckType);

1777 return Diff == 1;

1778}

1779

1782 [this, SI](Value *Ptr) {

1783 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);

1784 InstMap.push_back(SI);

1785 ++AccessIdx;

1786 });

1787}

1788

1791 [this, LI](Value *Ptr) {

1792 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);

1793 InstMap.push_back(LI);

1794 ++AccessIdx;

1795 });

1796}

1797

1800 switch (Type) {

1805

1813 }

1815}

1816

1818 switch (Type) {

1824 return false;

1825

1829 return true;

1830 }

1832}

1833

1837

1839 switch (Type) {

1842 return true;

1843

1850 return false;

1851 }

1853}

1854

1855bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,

1857 unsigned CommonStride) {

1858

1859

1860

1861

1862

1863

1864

1865

1866

1867

1868

1869

1870 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;

1871

1872 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =

1874 MaxStoreLoadForwardSafeDistanceInBits);

1875

1876

1877 for (uint64_t VF = 2 * TypeByteSize;

1878 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {

1879

1880

1881 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {

1882 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);

1883 break;

1884 }

1885 }

1886

1887 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {

1889 dbgs() << "LAA: Distance " << Distance

1890 << " that could cause a store-load forwarding conflict\n");

1891 return true;

1892 }

1893

1894 if (CommonStride &&

1895 MaxVFWithoutSLForwardIssuesPowerOf2 <

1896 MaxStoreLoadForwardSafeDistanceInBits &&

1897 MaxVFWithoutSLForwardIssuesPowerOf2 !=

1899 uint64_t MaxVF =

1900 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);

1901 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;

1902 MaxStoreLoadForwardSafeDistanceInBits =

1903 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);

1904 }

1905 return false;

1906}

1907

1909 if (Status < S)

1910 Status = S;

1911}

1912

1913

1914

1915

1916

1917

1918

1919

1920

1921

1922

1923

1924

1926 const SCEV &MaxBTC, const SCEV &Dist,

1928

1929

1930

1931

1932

1933

1934

1935

1936

1937

1938

1939

1940

1941

1942

1943

1944

1945

1948

1949 const SCEV *CastedDist = &Dist;

1950 const SCEV *CastedProduct = Product;

1951 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());

1952 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());

1953

1954

1955

1956

1957 if (DistTypeSizeBits > ProductTypeSizeBits)

1959 else

1961

1962

1963

1966 return true;

1967

1968

1969

1973}

1974

1975

1976

1977

1978

1979

1982 assert(Stride > 1 && "The stride must be greater than 1");

1983 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");

1984 assert(Distance > 0 && "The distance must be non-zero");

1985

1986

1987 if (Distance % TypeByteSize)

1988 return false;

1989

1990

1991

1992

1993

1994

1995

1996

1997

1998

1999

2000

2001

2002

2003

2004

2005

2006 return Distance % Stride;

2007}

2008

2009bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,

2010 Type *SrcTy,

2011 const SCEV *Sink,

2012 Type *SinkTy) {

2013 const SCEV *BTC = PSE.getBackedgeTakenCount();

2014 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();

2015 ScalarEvolution &SE = *PSE.getSE();

2016 const auto &[SrcStart_, SrcEnd_] =

2018 &SE, &PointerBounds, DT, AC, LoopGuards);

2020 return false;

2021

2022 const auto &[SinkStart_, SinkEnd_] =

2024 &SE, &PointerBounds, DT, AC, LoopGuards);

2027 return false;

2028

2029 if (!LoopGuards)

2031

2033 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);

2035 return true;

2036

2037 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);

2038 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);

2040}

2041

2043 MemoryDepChecker::DepDistanceStrideAndSizeInfo>

2044MemoryDepChecker::getDependenceDistanceStrideAndSize(

2045 const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,

2046 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {

2047 const auto &DL = InnermostLoop->getHeader()->getDataLayout();

2048 auto &SE = *PSE.getSE();

2049 const auto &[APtr, AIsWrite] = A;

2050 const auto &[BPtr, BIsWrite] = B;

2051

2052

2053 if (!AIsWrite && !BIsWrite)

2055

2058

2059

2060 if (APtr->getType()->getPointerAddressSpace() !=

2061 BPtr->getType()->getPointerAddressSpace())

2063

2064 std::optional<int64_t> StrideAPtr = getPtrStride(

2065 PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);

2066 std::optional<int64_t> StrideBPtr = getPtrStride(

2067 PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);

2068

2069 const SCEV *Src = PSE.getSCEV(APtr);

2070 const SCEV *Sink = PSE.getSCEV(BPtr);

2071

2072

2073

2074

2075 if (StrideAPtr && *StrideAPtr < 0) {

2079 std::swap(StrideAPtr, StrideBPtr);

2080 }

2081

2082 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);

2083

2084 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink

2085 << "\n");

2086 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst

2087 << ": " << *Dist << "\n");

2088

2089

2090

2091

2092

2093

2094

2095

2096 if (!StrideAPtr || !StrideBPtr) {

2097 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");

2099 }

2100

2101 int64_t StrideAPtrInt = *StrideAPtr;

2102 int64_t StrideBPtrInt = *StrideBPtr;

2103 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt

2104 << " Sink induction step: " << StrideBPtrInt << "\n");

2105

2106

2107 if (!StrideAPtrInt || !StrideBPtrInt)

2109

2110

2111

2112 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {

2114 dbgs() << "Pointer access with strides in different directions\n");

2116 }

2117

2118 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);

2119 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);

2120

2121

2122

2123 uint64_t ASz = DL.getTypeAllocSize(ATy);

2124 uint64_t BSz = DL.getTypeAllocSize(BTy);

2125 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;

2126

2127 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;

2128 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;

2129

2130 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);

2131

2132 std::optional<uint64_t> CommonStride;

2133 if (StrideAScaled == StrideBScaled)

2134 CommonStride = StrideAScaled;

2135

2136

2137

2139 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;

2140

2141

2143 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");

2145 }

2146

2147 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,

2148 TypeByteSize, AIsWrite, BIsWrite);

2149}

2150

2152MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,

2154 assert(AIdx < BIdx && "Must pass arguments in program order");

2155

2156

2157

2158

2159 auto CheckCompletelyBeforeOrAfter = [&]() {

2160 auto *APtr = A.getPointer();

2161 auto *BPtr = B.getPointer();

2164 const SCEV *Src = PSE.getSCEV(APtr);

2165 const SCEV *Sink = PSE.getSCEV(BPtr);

2166 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);

2167 };

2168

2169

2170

2171 auto Res =

2172 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);

2173 if (std::holds_alternativeDependence::DepType(Res)) {

2175 CheckCompletelyBeforeOrAfter())

2177 return std::getDependence::DepType(Res);

2178 }

2179

2180 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =

2181 std::get(Res);

2182 bool HasSameSize = TypeByteSize > 0;

2183

2184 ScalarEvolution &SE = *PSE.getSE();

2185 auto &DL = InnermostLoop->getHeader()->getDataLayout();

2186

2187

2188

2189

2190

2191

2192 if (HasSameSize &&

2194 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))

2196

2197

2198

2199 const APInt *APDist = nullptr;

2200 uint64_t ConstDist =

2202

2203

2204 if (APDist) {

2205

2206

2207 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&

2209 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");

2211 }

2212 } else {

2213 if (!LoopGuards)

2214 LoopGuards.emplace(

2217 }

2218

2219

2222 if (HasSameSize) {

2223

2225 }

2226 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "

2227 "different type sizes\n");

2229 }

2230

2231 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);

2232

2233

2234

2235

2236

2237

2238

2239

2241 if (!ConstDist) {

2244 }

2245 if (!HasSameSize ||

2246 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {

2248 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");

2250 }

2251 }

2252

2253 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");

2255 }

2256

2258

2259 if (MinDistance <= 0) {

2262 }

2263

2264 if (!HasSameSize) {

2265 if (CheckCompletelyBeforeOrAfter())

2267 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "

2268 "different type sizes\n");

2270 }

2271

2276

2277 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);

2278

2279

2280

2281

2282

2283

2284

2285

2286

2287

2288

2289

2290

2291

2292

2293

2294

2295

2296

2297

2298

2299

2300

2301

2302

2303

2304

2305

2306

2307

2308

2309

2310

2311

2312 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;

2313 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {

2314 if (!ConstDist) {

2315

2316

2317

2318

2321 }

2322 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "

2323 << MinDistance << '\n');

2325 }

2326

2327

2328

2329 if (MinDistanceNeeded > MinDepDistBytes) {

2330 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "

2331 << MinDistanceNeeded << " size in bytes\n");

2333 }

2334

2335 MinDepDistBytes =

2336 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);

2337

2338 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);

2340 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))

2342

2343 uint64_t MaxVF = MinDepDistBytes / MaxStride;

2344 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance

2345 << " with max VF = " << MaxVF << '\n');

2346

2347 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;

2348 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {

2349

2350

2351

2352

2355 }

2356

2357 if (CheckCompletelyBeforeOrAfter())

2359

2360 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);

2362}

2363

2366

2367 MinDepDistBytes = -1;

2370 if (Visited.contains(CurAccess))

2371 continue;

2372

2373

2378

2379

2380 while (AI != AE) {

2381 Visited.insert(*AI);

2382 bool AIIsWrite = AI->getInt();

2383

2384

2386 (AIIsWrite ? AI : std::next(AI));

2387 while (OI != AE) {

2388

2389 auto &Acc = Accesses[*AI];

2390 for (std::vector::iterator I1 = Acc.begin(), I1E = Acc.end();

2391 I1 != I1E; ++I1)

2392

2393

2394 for (std::vector::iterator

2395 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),

2396 I2E = (OI == AI ? I1E : Accesses[*OI].end());

2397 I2 != I2E; ++I2) {

2398 auto A = std::make_pair(&*AI, *I1);

2399 auto B = std::make_pair(&*OI, *I2);

2400

2402 if (*I1 > *I2)

2404

2406 isDependent(*A.first, A.second, *B.first, B.second);

2408

2409

2410

2411

2412

2413 if (RecordDependences) {

2415 Dependences.emplace_back(A.second, B.second, Type);

2416

2418 RecordDependences = false;

2419 Dependences.clear();

2421 << "Too many dependences, stopped recording\n");

2422 }

2423 }

2425 return false;

2426 }

2427 ++OI;

2428 }

2429 ++AI;

2430 }

2431 }

2432

2433 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");

2435}

2436

2440 auto I = Accesses.find(Access);

2442 if (I != Accesses.end()) {

2443 transform(I->second, std::back_inserter(Insts),

2444 [&](unsigned Idx) { return this->InstMap[Idx]; });

2445 }

2446

2447 return Insts;

2448}

2449

2451 "NoDep",

2452 "Unknown",

2453 "IndirectUnsafe",

2454 "Forward",

2455 "ForwardButPreventsForwarding",

2456 "Backward",

2457 "BackwardVectorizable",

2458 "BackwardVectorizableButPreventsForwarding"};

2459

2467

2468bool LoopAccessInfo::canAnalyzeLoop() {

2469

2472 << TheLoop->getLocStr() << "\n");

2473

2474

2476 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");

2477 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";

2478 return false;

2479 }

2480

2481

2484 dbgs() << "LAA: loop control flow is not understood by analyzer\n");

2485 recordAnalysis("CFGNotUnderstood")

2486 << "loop control flow is not understood by analyzer";

2487 return false;

2488 }

2489

2490

2491

2492

2495 recordAnalysis("CantComputeNumberOfIterations")

2496 << "could not determine number of loop iterations";

2497 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");

2498 return false;

2499 }

2500

2501 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "

2503 return true;

2504}

2505

2506bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,

2507 const TargetLibraryInfo *TLI,

2508 DominatorTree *DT) {

2509

2512 SmallPtrSet<MDNode *, 8> LoopAliasScopes;

2513

2514

2515 unsigned NumReads = 0;

2516 unsigned NumReadWrites = 0;

2517

2518 bool HasComplexMemInst = false;

2519

2520

2521 HasConvergentOp = false;

2522

2523 PtrRtChecking->Pointers.clear();

2524 PtrRtChecking->Need = false;

2525

2527

2528 const bool EnableMemAccessVersioningOfLoop =

2531

2532

2533

2534 LoopBlocksRPO RPOT(TheLoop);

2535 RPOT.perform(LI);

2536 for (BasicBlock *BB : RPOT) {

2537

2538

2539 for (Instruction &I : *BB) {

2542 HasConvergentOp = true;

2543 }

2544

2545

2546

2547 if (HasComplexMemInst && HasConvergentOp)

2548 return false;

2549

2550

2551 if (HasComplexMemInst)

2552 continue;

2553

2554

2556 for (Metadata *Op : Decl->getScopeList()->operands())

2558

2559

2560

2561

2564 continue;

2565

2566

2567

2568

2569 if (I.mayReadFromMemory()) {

2570 auto hasPointerArgs = [](CallBase *CB) {

2571 return any_of(CB->args(), [](Value const *Arg) {

2572 return Arg->getType()->isPointerTy();

2573 });

2574 };

2575

2576

2577

2578

2581 continue;

2582

2584 if (!Ld) {

2585 recordAnalysis("CantVectorizeInstruction", Ld)

2586 << "instruction cannot be vectorized";

2587 HasComplexMemInst = true;

2588 continue;

2589 }

2590 if (!Ld->isSimple() && !IsAnnotatedParallel) {

2591 recordAnalysis("NonSimpleLoad", Ld)

2592 << "read with atomic ordering or volatile read";

2593 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");

2594 HasComplexMemInst = true;

2595 continue;

2596 }

2597 NumLoads++;

2600 if (EnableMemAccessVersioningOfLoop)

2601 collectStridedAccess(Ld);

2602 continue;

2603 }

2604

2605

2606 if (I.mayWriteToMemory()) {

2608 if (!St) {

2609 recordAnalysis("CantVectorizeInstruction", St)

2610 << "instruction cannot be vectorized";

2611 HasComplexMemInst = true;

2612 continue;

2613 }

2614 if (!St->isSimple() && !IsAnnotatedParallel) {

2615 recordAnalysis("NonSimpleStore", St)

2616 << "write with atomic ordering or volatile write";

2617 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");

2618 HasComplexMemInst = true;

2619 continue;

2620 }

2621 NumStores++;

2624 if (EnableMemAccessVersioningOfLoop)

2625 collectStridedAccess(St);

2626 }

2627 }

2628 }

2629

2630 if (HasComplexMemInst)

2631 return false;

2632

2633

2634

2635

2636

2637

2638 if (!Stores.size()) {

2639 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");

2640 return true;

2641 }

2642

2644 AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,

2645 LoopAliasScopes);

2646

2647

2648

2649

2650

2651

2652 SmallSet<std::pair<Value *, Type *>, 16> Seen;

2653

2654

2655

2656 SmallPtrSet<Value *, 16> UniformStores;

2657

2658 for (StoreInst *ST : Stores) {

2659 Value *Ptr = ST->getPointerOperand();

2660

2661 if (isInvariant(Ptr)) {

2662

2663 StoresToInvariantAddresses.push_back(ST);

2664 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=

2665 !UniformStores.insert(Ptr).second;

2666 }

2667

2668

2669

2671 if (Seen.insert({Ptr, AccessTy}).second) {

2672 ++NumReadWrites;

2673

2675

2676

2677

2678 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))

2680

2682 [&Accesses, AccessTy, Loc](Value *Ptr) {

2683 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);

2684 Accesses.addStore(NewLoc, AccessTy);

2685 });

2686 }

2687 }

2688

2689 if (IsAnnotatedParallel) {

2691 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "

2692 << "checks.\n");

2693 return true;

2694 }

2695

2696 for (LoadInst *LD : Loads) {

2697 Value *Ptr = LD->getPointerOperand();

2698

2699

2700

2701

2702

2703

2704

2705

2706 bool IsReadOnlyPtr = false;

2708 if (Seen.insert({Ptr, AccessTy}).second ||

2709 getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,

2710 true)) {

2711 ++NumReads;

2712 IsReadOnlyPtr = true;

2713 }

2714

2715

2716

2717 if (UniformStores.contains(Ptr)) {

2718 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "

2719 "load and uniform store to the same address!\n");

2720 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;

2721 }

2722

2724

2725

2726

2727 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))

2729

2731 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {

2732 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);

2733 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);

2734 });

2735 }

2736

2737

2738

2739 if (NumReadWrites == 1 && NumReads == 0) {

2740 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");

2741 return true;

2742 }

2743

2744

2745

2746 Accesses.buildDependenceSets();

2747

2748

2749

2750 Value *UncomputablePtr = nullptr;

2751 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(

2752 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);

2753 if (!HasCompletePtrRtChecking) {

2755 recordAnalysis("CantIdentifyArrayBounds", I)

2756 << "cannot identify array bounds";

2757 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "

2758 << "the array bounds.\n");

2759 return false;

2760 }

2761

2763 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");

2764

2765 bool DepsAreSafe = true;

2766 if (Accesses.isDependencyCheckNeeded()) {

2767 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");

2768 DepsAreSafe =

2769 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());

2770

2772 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");

2773

2774

2775 Accesses.resetDepChecks(*DepChecker);

2776

2777 PtrRtChecking->reset();

2778 PtrRtChecking->Need = true;

2779

2780 UncomputablePtr = nullptr;

2781 HasCompletePtrRtChecking =

2782 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,

2783 UncomputablePtr, AllowPartial);

2784

2785

2786 if (!HasCompletePtrRtChecking) {

2788 recordAnalysis("CantCheckMemDepsAtRunTime", I)

2789 << "cannot check memory dependencies at runtime";

2790 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");

2791 return false;

2792 }

2793 DepsAreSafe = true;

2794 }

2795 }

2796

2797 if (HasConvergentOp) {

2798 recordAnalysis("CantInsertRuntimeCheckWithConvergent")

2799 << "cannot add control dependency to convergent operation";

2800 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "

2801 "would be needed with a convergent operation\n");

2802 return false;

2803 }

2804

2805 if (DepsAreSafe) {

2807 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"

2808 << (PtrRtChecking->Need ? "" : " don't")

2809 << " need runtime memory checks.\n");

2810 return true;

2811 }

2812

2813 emitUnsafeDependenceRemark();

2814 return false;

2815}

2816

2817void LoopAccessInfo::emitUnsafeDependenceRemark() {

2818 const auto *Deps = getDepChecker().getDependences();

2819 if (!Deps)

2820 return;

2821 const auto *Found =

2822 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {

2825 });

2826 if (Found == Deps->end())

2827 return;

2828 MemoryDepChecker::Dependence Dep = *Found;

2829

2830 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");

2831

2832

2833 bool HasForcedDistribution = false;

2834 std::optional<const MDOperand *> Value =

2837 const MDOperand *Op = *Value;

2840 }

2841

2842 const std::string Info =

2843 HasForcedDistribution

2844 ? "unsafe dependent memory operations in loop."

2845 : "unsafe dependent memory operations in loop. Use "

2846 "#pragma clang loop distribute(enable) to allow loop distribution "

2847 "to attempt to isolate the offending operations into a separate "

2848 "loop";

2849 OptimizationRemarkAnalysis &R =

2850 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;

2851

2852 switch (Dep.Type) {

2858 R << "\nBackward loop carried data dependence.";

2859 break;

2861 R << "\nForward loop carried data dependence that prevents "

2862 "store-to-load forwarding.";

2863 break;

2865 R << "\nBackward loop carried data dependence that prevents "

2866 "store-to-load forwarding.";

2867 break;

2869 R << "\nUnsafe indirect dependence.";

2870 break;

2872 R << "\nUnknown data dependence.";

2873 break;

2874 }

2875

2876 if (Instruction *I = Dep.getSource(getDepChecker())) {

2877 DebugLoc SourceLoc = I->getDebugLoc();

2879 SourceLoc = DD->getDebugLoc();

2880 if (SourceLoc)

2881 R << " Memory location is the same as accessed at "

2882 << ore::NV("Location", SourceLoc);

2883 }

2884}

2885

2887 const Loop *TheLoop,

2889 assert(TheLoop->contains(BB) && "Unknown block used");

2890

2891

2892 const BasicBlock *Latch = TheLoop->getLoopLatch();

2893 return !DT->dominates(BB, Latch);

2894}

2895

2898 assert(!Report && "Multiple reports generated");

2899

2902

2903 if (I) {

2904 CodeRegion = I->getParent();

2905

2906

2907 if (I->getDebugLoc())

2908 DL = I->getDebugLoc();

2909 }

2910

2911 Report = std::make_unique(DEBUG_TYPE, RemarkName,

2912 DL, CodeRegion);

2913 return *Report;

2914}

2915

2917 auto *SE = PSE->getSE();

2918 if (TheLoop->isLoopInvariant(V))

2919 return true;

2921 return false;

2924}

2925

2926

2927

2931 if (GEP)

2932 return Ptr;

2933

2935 for (const Use &U : GEP->operands()) {

2937 if (V == Ptr)

2938 V = U;

2939 else

2940

2941 return Ptr;

2942 }

2943 }

2944 return V;

2945}

2946

2947

2948

2951 if (!PtrTy)

2952 return nullptr;

2953

2954

2955

2956

2957 Value *OrigPtr = Ptr;

2958

2961

2962 if (Ptr != OrigPtr)

2963

2965 V = C->getOperand();

2966

2968 return nullptr;

2969

2970

2971

2973 return nullptr;

2974

2975

2977 return V;

2978

2981 return V;

2982

2983 return nullptr;

2984}

2985

2986void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {

2988 if (!Ptr)

2989 return;

2990

2991

2992

2993

2994

2995

2996

2998 if (!StrideExpr)

2999 return;

3000

3002 return;

3003

3004 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "

3005 "versioning:");

3006 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");

3007

3009 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");

3010 return;

3011 }

3012

3013

3014

3015

3016

3017

3018

3019

3020

3021

3022

3023

3024

3025

3026 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();

3027

3028

3029

3030

3032 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());

3033 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());

3034 const SCEV *CastedStride = StrideExpr;

3035 const SCEV *CastedBECount = MaxBTC;

3036 ScalarEvolution *SE = PSE->getSE();

3037 if (BETypeSizeBits >= StrideTypeSizeBits)

3039 else

3041 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);

3042

3043

3044

3047 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "

3048 "Stride==1 predicate will imply that the loop executes "

3049 "at most once.\n");

3050 return;

3051 }

3052 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");

3053

3054

3055

3056 const SCEV *StrideBase = StrideExpr;

3058 StrideBase = C->getOperand();

3060}

3061

3068 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {

3069 unsigned MaxTargetVectorWidthInBits = std::numeric_limits::max();

3070 if (TTI && TTI->enableScalableVectorization())

3071

3072

3073 MaxTargetVectorWidthInBits =

3075

3076 DepChecker = std::make_unique(

3077 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);

3078 PtrRtChecking =

3079 std::make_unique(*DepChecker, SE, LoopGuards);

3080 if (canAnalyzeLoop())

3081 CanVecMem = analyzeLoop(AA, LI, TLI, DT);

3082}

3083

3085 if (CanVecMem) {

3086 OS.indent(Depth) << "Memory dependences are safe";

3089 OS << " with a maximum safe vector width of "

3093 OS << ", with a maximum safe store-load forward width of " << SLDist

3094 << " bits";

3095 }

3096 if (PtrRtChecking->Need)

3097 OS << " with run-time checks";

3098 OS << "\n";

3099 }

3100

3101 if (HasConvergentOp)

3102 OS.indent(Depth) << "Has convergent operation in loop\n";

3103

3104 if (Report)

3105 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";

3106

3107 if (auto *Dependences = DepChecker->getDependences()) {

3109 for (const auto &Dep : *Dependences) {

3110 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());

3111 OS << "\n";

3112 }

3113 } else

3114 OS.indent(Depth) << "Too many dependences, not recorded\n";

3115

3116

3117 PtrRtChecking->print(OS, Depth);

3118 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)

3119 OS.indent(Depth) << "Generated run-time checks are incomplete\n";

3120 OS << "\n";

3121

3123 << "Non vectorizable stores to invariant address were "

3124 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||

3125 HasLoadStoreDependenceInvolvingLoopInvariantAddress

3126 ? ""

3127 : "not ")

3128 << "found in loop.\n";

3129

3130 OS.indent(Depth) << "SCEV assumptions:\n";

3131 PSE->getPredicate().print(OS, Depth);

3132

3133 OS << "\n";

3134

3135 OS.indent(Depth) << "Expressions re-written:\n";

3136 PSE->print(OS, Depth);

3137}

3138

3140 bool AllowPartial) {

3141 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);

3142

3143

3144

3145 if (Inserted || It->second->hasAllowPartial() != AllowPartial)

3146 It->second = std::make_unique(&L, &SE, TTI, TLI, &AA, &DT,

3147 &LI, AC, AllowPartial);

3148

3149 return *It->second;

3150}

3152

3153

3154

3155

3156 for (const auto &[L, LAI] : LoopAccessInfoMap) {

3157 if (LAI->getRuntimePointerChecking()->getChecks().empty() &&

3158 LAI->getPSE().getPredicate().isAlwaysTrue())

3159 continue;

3160 LoopAccessInfoMap.erase(L);

3161 }

3162}

3163

3166 FunctionAnalysisManager::Invalidator &Inv) {

3167

3170

3171 return true;

3172

3173

3174

3175

3176 return Inv.invalidate<AAManager>(F, PA) ||

3180}

3181

3193

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

This file contains the declarations for the subclasses of Constant, which represent the different fla...

DXIL Forward Handle Accesses

This file defines the DenseMap class.

Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...

This header defines various interfaces for pass management in LLVM.

static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))

We collect dependences up to this threshold.

static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))

Enable store-to-load forwarding conflict detection.

static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)

Definition LoopAccessAnalysis.cpp:1112

static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))

The maximum iterations used to merge memory checks.

static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)

Get the stride of a pointer access in a loop.

Definition LoopAccessAnalysis.cpp:2949

static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)

Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...

Definition LoopAccessAnalysis.cpp:213

static std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)

Try to compute a constant stride for AR.

Definition LoopAccessAnalysis.cpp:969

static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))

static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))

static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)

Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.

Definition LoopAccessAnalysis.cpp:753

static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))

static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))

static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)

Definition LoopAccessAnalysis.cpp:1073

static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt)

Check whether AR is a non-wrapping AddRec.

Definition LoopAccessAnalysis.cpp:1019

static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)

Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...

Definition LoopAccessAnalysis.cpp:1925

static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)

Check the dependence for two accesses with the same stride Stride.

Definition LoopAccessAnalysis.cpp:1980

static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)

Compare I and J and return the minimum.

Definition LoopAccessAnalysis.cpp:557

static const SCEV * mulSCEVOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)

Returns A * B, if it is guaranteed not to unsigned wrap.

Definition LoopAccessAnalysis.cpp:204

static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)

If Ptr is a GEP, which has a loop-variant operand, return that operand.

Definition LoopAccessAnalysis.cpp:2928

static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))

static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))

static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))

This enables versioning on the strides of symbolically striding memory accesses in code like the foll...

static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)

Returns A + B, if it is guaranteed not to unsigned wrap.

Definition LoopAccessAnalysis.cpp:195

This header provides classes for managing per-loop analyses.

This file provides utility analysis objects describing memory locations.

FunctionAnalysisManager FAM

This file defines the PointerIntPair class.

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

static const X86InstrFMA3Group Groups[]

A manager for alias analyses.

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

APInt abs() const

Get the absolute value.

LLVM_ABI APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

std::optional< int64_t > trySExtValue() const

Get sign extended value if possible.

int64_t getSExtValue() const

Get sign extended value.

This templated class represents "all analyses that operate over " (e....

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

A function analysis which provides an AssumptionCache.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this basic block belongs to.

bool isNoBuiltin() const

Return true if the call should not be treated as a call to a builtin.

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

bool isConvergent() const

Determine if the invoke is convergent.

@ ICMP_UGE

unsigned greater or equal

@ ICMP_SGE

signed greater or equal

@ ICMP_ULE

unsigned less or equal

static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)

A parsed version of the target data layout string in and methods for querying it.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

Analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

iterator_range< member_iterator > members(const ECValue &ECV) const

bool contains(const ElemTy &V) const

Returns true if V is contained an equivalence class.

const ECValue & insert(const ElemTy &Data)

insert - Insert a new value into the union/find set, ignoring the request if the value already exists...

member_iterator member_end() const

const ElemTy & getLeaderValue(const ElemTy &V) const

getLeaderValue - Return the leader for the specified value that is in the set.

member_iterator findLeader(const ElemTy &V) const

findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.

member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)

union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...

bool hasOptSize() const

Optimize this function for size (-Os) or minimum size (-Oz).

PointerType * getType() const

Global values are always pointers.

Class to represent integer types.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

An instruction for reading from memory.

Value * getPointerOperand()

static constexpr LocationSize beforeOrAfterPointer()

Any location before or after the base pointer (but still within the underlying object).

This analysis provides dependence information for the memory accesses of a loop.

LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)

Definition LoopAccessAnalysis.cpp:3182

LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)

Definition LoopAccessAnalysis.cpp:3164

LLVM_ABI void clear()

Definition LoopAccessAnalysis.cpp:3151

LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)

Definition LoopAccessAnalysis.cpp:3139

Drive the analysis of memory accesses in the loop.

const MemoryDepChecker & getDepChecker() const

the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...

LLVM_ABI bool isInvariant(Value *V) const

Returns true if value V is loop invariant.

Definition LoopAccessAnalysis.cpp:2916

LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const

Print the information about the memory accesses in the loop.

Definition LoopAccessAnalysis.cpp:3084

static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)

Return true if the block BB needs to be predicated in order for the loop to be vectorized.

Definition LoopAccessAnalysis.cpp:2886

LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)

Definition LoopAccessAnalysis.cpp:3062

Analysis pass that exposes the LoopInfo for a function.

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

bool isInnermost() const

Return true if the loop does not contain any (natural) loops.

unsigned getNumBackEdges() const

Calculate the number of back edges to the loop header.

BlockT * getHeader() const

LoopT * getParentLoop() const

Return the parent loop if it exists or nullptr for top level loops.

Represents a single loop in the control flow graph.

std::string getLocStr() const

Return a string containing the debug location of the loop (file name + line number if present,...

bool isAnnotatedParallel() const

Returns true if the loop is annotated parallel.

DebugLoc getStartLoc() const

Return the debug location of the start of this loop.

ArrayRef< MDOperand > operands() const

Checks memory dependences among accesses to the same underlying object to determine whether there vec...

ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const

Return the program order indices for the access location (Ptr, IsWrite).

bool isSafeForAnyStoreLoadForwardDistances() const

Return true if there are no store-load forwarding dependencies.

bool isSafeForAnyVectorWidth() const

Return true if the number of elements that are safe to operate on simultaneously is not bounded.

LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, const MemAccessInfoList &CheckDeps)

Check whether the dependencies between the accesses are safe, and records the dependence information ...

Definition LoopAccessAnalysis.cpp:2364

EquivalenceClasses< MemAccessInfo > DepCandidates

Set of potential dependent memory accesses.

bool shouldRetryWithRuntimeChecks() const

In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...

const Loop * getInnermostLoop() const

uint64_t getMaxSafeVectorWidthInBits() const

Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...

bool isSafeForVectorization() const

No memory dependence was encountered that would inhibit vectorization.

SmallVector< MemAccessInfo, 8 > MemAccessInfoList

LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const

Find the set of instructions that read or write via Ptr.

Definition LoopAccessAnalysis.cpp:2438

VectorizationSafetyStatus

Type to keep track of the status of the dependence check.

@ PossiblySafeWithRtChecks

LLVM_ABI void addAccess(StoreInst *SI)

Register the location (instructions are given increasing numbers) of a write access.

Definition LoopAccessAnalysis.cpp:1780

PointerIntPair< Value *, 1, bool > MemAccessInfo

uint64_t getStoreLoadForwardSafeDistanceInBits() const

Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...

Representation for a specific memory location.

static LLVM_ABI MemoryLocation get(const LoadInst *LI)

Return a location with information about the memory reference by the given instruction.

LocationSize Size

The maximum size of the location, in address-units, or UnknownSize if the size is not known.

AAMDNodes AATags

The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...

const Value * Ptr

The address of the start of the location.

PointerIntPair - This class implements a pair of a pointer and small integer.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

LLVM_ABI void addPredicate(const SCEVPredicate &Pred)

Adds a new predicate.

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)

Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.

LLVM_ABI void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)

Proves that V doesn't overflow by adding SCEV predicate.

LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)

Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.

LLVM_ABI const SCEV * getBackedgeTakenCount()

Get the (predicated) backedge count for the analyzed loop.

LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()

Get the (predicated) symbolic max backedge count for the analyzed loop.

LLVM_ABI const SCEV * getSCEV(Value *V)

Returns the SCEV expression of V, in the context of the current SCEV predicate.

A set of analyses that are preserved following a run of a transformation pass.

PreservedAnalysisChecker getChecker() const

Build a checker for this PreservedAnalyses and the specified analysis type.

Holds information about the memory runtime legality checks to verify that a group of pointers do not ...

bool Need

This flag indicates if we need to add the runtime check.

void reset()

Reset the state of the pointer runtime information.

unsigned getNumberOfChecks() const

Returns the number of run-time checks required according to needsChecking.

LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const

Print Checks.

Definition LoopAccessAnalysis.cpp:760

LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const

Decide if we need to add a check between two groups of pointers, according to needsChecking.

Definition LoopAccessAnalysis.cpp:546

LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const

Print the list run-time memory checks necessary.

Definition LoopAccessAnalysis.cpp:779

SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups

Holds a partitioning of pointers into "check groups".

LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)

Generate the checks and store it.

Definition LoopAccessAnalysis.cpp:539

friend struct RuntimeCheckingPtrGroup

static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)

Check if pointers are in the same partition.

Definition LoopAccessAnalysis.cpp:728

SmallVector< PointerInfo, 2 > Pointers

Information about the pointers that may require checking.

LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)

Insert a pointer and calculate the start and end SCEVs.

Definition LoopAccessAnalysis.cpp:400

This node represents a polynomial recurrence on the trip count of the specified loop.

const SCEV * getStart() const

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

const Loop * getLoop() const

This class represents a constant integer value.

ConstantInt * getValue() const

const APInt & getAPInt() const

NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const

This class represents an analyzed expression in the program.

LLVM_ABI bool isZero() const

Return true if the expression is a constant zero.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

Analysis pass that exposes the ScalarEvolution for a function.

static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)

Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...

The main scalar evolution driver.

const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)

When successful, this returns a SCEVConstant that is greater than or equal to (i.e.

LLVM_ABI bool isKnownNonNegative(const SCEV *S)

Test if the given expression is known to be non-negative.

LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)

Return the SCEV object corresponding to -V.

LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const

LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)

LLVM_ABI bool isKnownNonPositive(const SCEV *S)

Test if the given expression is known to be non-positive.

LLVM_ABI bool isKnownNegative(const SCEV *S)

Test if the given expression is known to be negative.

LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)

LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)

Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?

LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)

LLVM_ABI const SCEV * getConstant(ConstantInt *V)

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)

Return a SCEV corresponding to a conversion of the input value to the specified type.

const SCEV * getOne(Type *Ty)

Return a SCEV for the constant 1 of a specific type.

LLVM_ABI const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)

LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

LLVM_ABI bool isKnownPositive(const SCEV *S)

Test if the given expression is known to be positive.

LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)

LLVM_ABI bool isSCEVable(Type *Ty) const

Test if values of the given type are analyzable within the SCEV framework.

LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const

Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...

LLVM_ABI const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)

APInt getSignedRangeMin(const SCEV *S)

Determine the min of the signed range for a particular SCEV.

LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)

Return an expression for the store size of StoreTy that is type IntTy.

LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)

Return a SCEV corresponding to a conversion of the input value to the specified type.

LLVM_ABI const SCEV * getCouldNotCompute()

LLVM_ABI const SCEV * getPointerBase(const SCEV *V)

Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...

LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)

Try to apply information from loop guards for L to Expr.

LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)

Return an expression for a TypeSize.

LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)

Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...

LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)

Return a SCEV corresponding to a conversion of the input value to the specified type.

LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)

Test if the given expression is known to satisfy the condition described by Pred, LHS,...

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

bool contains(const T &V) const

Check if the SmallSet contains the given element.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

StringRef - Represent a constant reference to a string, i.e.

Analysis pass providing the TargetTransformInfo.

Analysis pass providing the TargetLibraryInfo.

Provides information about what library functions are available for the current target.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

A Use represents the edge between a Value definition and its users.

static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)

Retrieve all the VFInfo instances associated to the CallInst CI.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI bool canBeFreed() const

Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...

LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const

Accumulate the constant offset this value has compared to a base pointer.

LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const

Returns the number of bytes known to be dereferenceable for the pointer value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

constexpr ScalarTy getFixedValue() const

An efficient, type-erasing, non-owning reference to a callable.

This class implements an extremely fast bulk output stream that can only output to a stream.

raw_ostream & indent(unsigned NumSpaces)

indent - Insert 'NumSpaces' spaces.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Abstract Attribute helper functions.

@ C

The default llvm calling convention, compatible with C.

bool match(Val *V, const Pattern &P)

bind_cst_ty m_scev_APInt(const APInt *&C)

Match an SCEV constant and bind it to an APInt.

is_undef_or_poison m_scev_UndefOrPoison()

Match an SCEVUnknown wrapping undef or poison.

class_match< const SCEVConstant > m_SCEVConstant()

specificloop_ty m_SpecificLoop(const Loop *L)

SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)

specificscev_ty m_scev_Specific(const SCEV *S)

Match if we have a specific specified SCEV.

class_match< const SCEV > m_SCEV()

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)

Check whether Metadata has a Value.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

DiagnosticInfoOptimizationBase::Argument NV

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

LLVM_ABI bool willNotFreeBetween(const Instruction *Assume, const Instruction *CtxI)

Returns true, if no instruction between Assume and CtxI may free memory and the function is marked as...

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI RetainedKnowledge getKnowledgeForValue(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, function_ref< bool(RetainedKnowledge, Instruction *, const CallBase::BundleOpInfo *)> Filter=[](auto...) { return true;})

Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and it match...

LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)

Return true if it is valid to use the assumptions provided by an assume intrinsic,...

LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

unsigned getPointerAddressSpace(const Type *T)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)

Find string metadata for loop.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

auto dyn_cast_if_present(const Y &Val)

dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

const Value * getPointerOperand(const Value *V)

A helper function that returns the pointer operand of a load, store or GEP instruction.

auto dyn_cast_or_null(const Y &Val)

OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)

Wrapper function around std::transform to apply a function to a range and store the result elsewhere.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)

Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)

Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...

Definition LoopAccessAnalysis.cpp:1656

LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)

Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...

Definition LoopAccessAnalysis.cpp:1725

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)

Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...

Definition LoopAccessAnalysis.cpp:155

LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)

Returns true if the memory operations A and B are consecutive.

Definition LoopAccessAnalysis.cpp:1766

DWARFExpression::Operation Op

LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Return true if this function can prove that V does not have undef bits and is never poison.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

T bit_floor(T Value)

Returns the largest integral power of two no greater than Value if Value is nonzero.

LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)

This method is similar to getUnderlyingObject except that it can look through phi and select instruct...

LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)

Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...

Definition LoopAccessAnalysis.cpp:318

LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

Definition LoopAccessAnalysis.cpp:1622

Implement std::hash so that hash_code can be used in STL containers.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

IR Values for the lower and upper bounds of a pointer evolution.

MDNode * Scope

The tag for alias scope specification (used with noalias).

MDNode * TBAA

The tag for type-based alias analysis.

MDNode * NoAlias

The tag specifying the noalias scope.

A special type used by analysis passes to provide an address that identifies that particular analysis...

Instruction * getDestination(const MemoryDepChecker &DepChecker) const

Return the destination instruction of the dependence.

DepType Type

The type of the dependence.

unsigned Destination

Index of the destination of the dependence in the InstMap vector.

LLVM_ABI bool isPossiblyBackward() const

May be a lexically backward dependence type (includes Unknown).

Definition LoopAccessAnalysis.cpp:1834

Instruction * getSource(const MemoryDepChecker &DepChecker) const

Return the source instruction of the dependence.

LLVM_ABI bool isForward() const

Lexically forward dependence.

Definition LoopAccessAnalysis.cpp:1838

LLVM_ABI bool isBackward() const

Lexically backward dependence.

Definition LoopAccessAnalysis.cpp:1817

LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const

Print the dependence.

Definition LoopAccessAnalysis.cpp:2460

unsigned Source

Index of the source of the dependence in the InstMap vector.

DepType

The type of the dependence.

@ BackwardVectorizableButPreventsForwarding

@ ForwardButPreventsForwarding

static LLVM_ABI const char * DepName[]

String version of the types.

static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)

Dependence types that don't prevent vectorization.

Definition LoopAccessAnalysis.cpp:1799

Represent one information held inside an operand bundle of an llvm.assume.

unsigned AddressSpace

Address space of the involved pointers.

LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)

Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.

Definition LoopAccessAnalysis.cpp:565

bool NeedsFreeze

Whether the pointer needs to be frozen after expansion, e.g.

LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)

Create a new pointer checking group containing a single pointer, with index Index in RtCheck.

Definition LoopAccessAnalysis.cpp:183

const SCEV * High

The SCEV expression which represents the upper bound of all the pointers in this group.

SmallVector< unsigned, 2 > Members

Indices of all the pointers that constitute this grouping.

const SCEV * Low

The SCEV expression which represents the lower bound of all the pointers in this group.

bool IsWritePtr

Holds the information if this pointer is used for writing to memory.

unsigned DependencySetId

Holds the id of the set of pointers that could be dependent because of a shared underlying object.

unsigned AliasSetId

Holds the id of the disjoint alias set to which this pointer belongs.

static LLVM_ABI const unsigned MaxVectorWidth

Maximum SIMD width.

static LLVM_ABI unsigned VectorizationFactor

VF as overridden by the user.

static LLVM_ABI unsigned RuntimeMemoryCheckThreshold

\When performing memory disambiguation checks at runtime do not make more than this number of compari...

static LLVM_ABI bool isInterleaveForced()

True if force-vector-interleave was specified by the user.

Definition LoopAccessAnalysis.cpp:151

static LLVM_ABI unsigned VectorizationInterleave

Interleave factor as overridden by the user.

static LLVM_ABI bool HoistRuntimeChecks

Function object to check whether the first component of a container supported by std::get (like std::...