LLVM: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

106#include

107#include

108#include

109#include

110#include

111#include

112#include

113#include

114#include <type_traits>

115#include

116#include

117

118using namespace llvm;

119

120#define DEBUG_TYPE "load-store-vectorizer"

121

122STATISTIC(NumVectorInstructions, "Number of vector accesses generated");

123STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");

124

125namespace {

126

127

128

129

130

131

132using EqClassKey =

133 std::tuple<const Value * ,

134 unsigned ,

135 unsigned ,

136 char

137 >;

139 const EqClassKey &K) {

140 const auto &[UnderlyingObject, AddrSpace, ElementSize, IsLoad] = K;

141 return OS << (IsLoad ? "load" : "store") << " of " << *UnderlyingObject

142 << " of element size " << ElementSize << " bits in addrspace "

143 << AddrSpace;

144}

145

146

147

148

149

150

151

152

153

154

155

156

157struct ChainElem {

159 APInt OffsetFromLeader;

160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)

161 : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}

162};

164

165void sortChainInBBOrder(Chain &C) {

166 sort(C, [](auto &A, auto &B) { return A.Inst->comesBefore(B.Inst); });

167}

168

169void sortChainInOffsetOrder(Chain &C) {

170 sort(C, [](const auto &A, const auto &B) {

171 if (A.OffsetFromLeader != B.OffsetFromLeader)

172 return A.OffsetFromLeader.slt(B.OffsetFromLeader);

173 return A.Inst->comesBefore(B.Inst);

174 });

175}

176

178 for (const auto &E : C) {

179 dbgs() << " " << *E.Inst << " (offset " << E.OffsetFromLeader << ")\n";

180 }

181}

182

183using EquivalenceClassMap =

185

186

187constexpr unsigned StackAdjustedAlignment = 4;

188

191 for (const ChainElem &E : C)

194}

195

198 return LI != nullptr && LI->hasMetadata(LLVMContext::MD_invariant_load);

199}

200

201

202

206

208 while (!Worklist.empty()) {

211 for (int Idx = 0; Idx < NumOperands; Idx++) {

213 if (!IM || IM->getOpcode() == Instruction::PHI)

214 continue;

215

216

217

218 if (IM->getParent() != I->getParent())

219 continue;

220

221 assert(IM != I && "Unexpected cycle while re-ordering instructions");

222

224 InstructionsToMove.insert(IM);

226 }

227 }

228 }

229

230

231 for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;) {

233 if (!InstructionsToMove.contains(IM))

234 continue;

236 }

237}

238

239class Vectorizer {

242 AssumptionCache &AC;

243 DominatorTree &DT;

244 ScalarEvolution &SE;

245 TargetTransformInfo &TTI;

246 const DataLayout &DL;

248

249

250

251

252

254

255public:

256 Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,

257 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)

258 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),

259 DL(F.getDataLayout()), Builder(SE.getContext()) {}

260

261 bool run();

262

263private:

264 static const unsigned MaxDepth = 3;

265

266

267

268

270

271

272

273 bool runOnEquivalenceClass(const EqClassKey &EqClassKey,

275

276

277

278

279 bool runOnChain(Chain &C);

280

281

282

283

284 std::vector splitChainByContiguity(Chain &C);

285

286

287

288

289 std::vector splitChainByMayAliasInstrs(Chain &C);

290

291

292

293 std::vector splitChainByAlignment(Chain &C);

294

295

296

297 bool vectorizeChain(Chain &C);

298

299

300 std::optional getConstantOffset(Value *PtrA, Value *PtrB,

301 Instruction *ContextInst,

302 unsigned Depth = 0);

303 std::optional getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,

304 Instruction *ContextInst,

306 std::optional getConstantOffsetSelects(Value *PtrA, Value *PtrB,

307 Instruction *ContextInst,

309

310

311

312

313 Type *getChainElemTy(const Chain &C);

314

315

316

317

318

319

320

321

322 template

324 Instruction *ChainElem, Instruction *ChainBegin,

325 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,

326 BatchAAResults &BatchAA);

327

328

329

330

331 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const;

332

333

334

335

336

337

340

341

342

343

344

345

347};

348

349class LoadStoreVectorizerLegacyPass : public FunctionPass {

350public:

351 static char ID;

352

353 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {

356 }

357

359

360 StringRef getPassName() const override {

361 return "GPU Load and Store Vectorizer";

362 }

363

364 void getAnalysisUsage(AnalysisUsage &AU) const override {

366 AU.addRequired();

367 AU.addRequired();

368 AU.addRequired();

369 AU.addRequired();

371 }

372};

373

374}

375

376char LoadStoreVectorizerLegacyPass::ID = 0;

377

379 "Vectorize load and Store instructions", false, false)

387 "Vectorize load and store instructions", false, false)

388

390 return new LoadStoreVectorizerLegacyPass();

391}

392

393bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {

394

395 if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))

396 return false;

397

398 AliasAnalysis &AA = getAnalysis().getAAResults();

399 DominatorTree &DT = getAnalysis().getDomTree();

400 ScalarEvolution &SE = getAnalysis().getSE();

401 TargetTransformInfo &TTI =

402 getAnalysis().getTTI(F);

403

404 AssumptionCache &AC =

405 getAnalysis().getAssumptionCache(F);

406

407 return Vectorizer(F, AA, AC, DT, SE, TTI).run();

408}

409

412

413 if (F.hasFnAttribute(Attribute::NoImplicitFloat))

415

421

422 bool Changed = Vectorizer(F, AA, AC, DT, SE, TTI).run();

426}

427

428bool Vectorizer::run() {

430

431

432

433

434

435

436

437

438

439

440

441

442

443

445

446 assert(!BB->empty());

447

454

455 for (auto It = Barriers.begin(), End = std::prev(Barriers.end()); It != End;

456 ++It)

457 Changed |= runOnPseudoBB(*It, *std::next(It));

458

461 if (I->use_empty())

462 I->eraseFromParent();

464 }

465 ToErase.clear();

466 }

467

469}

470

474 dbgs() << "LSV: Running on pseudo-BB [" << *Begin << " ... ";

475 if (End != Begin->getParent()->end())

476 dbgs() << *End;

477 else

478 dbgs() << "";

479 dbgs() << ")\n";

480 });

481

483 for (const auto &[EqClassKey, EqClass] :

484 collectEquivalenceClasses(Begin, End))

485 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);

486

488}

489

490bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,

493

495 dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()

496 << " keyed on " << EqClassKey << ":\n";

497 for (Instruction *I : EqClass)

498 dbgs() << " " << *I << "\n";

499 });

500

501 std::vector Chains = gatherChains(EqClass);

503 << " nontrivial chains.\n";);

504 for (Chain &C : Chains)

507}

508

509bool Vectorizer::runOnChain(Chain &C) {

511 dbgs() << "LSV: Running on chain with " << C.size() << " instructions:\n";

512 dumpChain(C);

513 });

514

515

516

517

518

519

520

522 for (auto &C : splitChainByMayAliasInstrs(C))

523 for (auto &C : splitChainByContiguity(C))

524 for (auto &C : splitChainByAlignment(C))

525 Changed |= vectorizeChain(C);

527}

528

529std::vector Vectorizer::splitChainByMayAliasInstrs(Chain &C) {

530 if (C.empty())

531 return {};

532

533 sortChainInBBOrder(C);

534

536 dbgs() << "LSV: splitChainByMayAliasInstrs considering chain:\n";

537 dumpChain(C);

538 });

539

540

541

542

543 DenseMap<Instruction *, APInt > ChainOffsets;

544 for (const auto &E : C)

545 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});

546

547

548

549 BatchAAResults BatchAA(AA);

550

551

552

553

554

555

556

557

558

559

560

561

562 auto Impl = [&](auto IsLoad) {

563

564 auto [ChainBegin, ChainEnd] = [&](auto IsLoad) {

565 if constexpr (IsLoad())

566 return std::make_pair(C.begin(), C.end());

567 else

568 return std::make_pair(C.rbegin(), C.rend());

569 }(IsLoad);

570 assert(ChainBegin != ChainEnd);

571

572 std::vector Chains;

575 for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {

577 ChainOffsets, BatchAA)) {

578 LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "

579 << *ChainIt->Inst << " into " << *ChainBegin->Inst

580 << "\n");

582 } else {

584 dbgs() << "LSV: Found intervening may-alias instrs; cannot merge "

585 << *ChainIt->Inst << " into " << *ChainBegin->Inst << "\n");

586 if (NewChain.size() > 1) {

588 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";

589 dumpChain(NewChain);

590 });

591 Chains.emplace_back(std::move(NewChain));

592 }

593

594

596 }

597 }

598 if (NewChain.size() > 1) {

600 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";

601 dumpChain(NewChain);

602 });

603 Chains.emplace_back(std::move(NewChain));

604 }

605 return Chains;

606 };

607

609 return Impl(std::bool_constant());

610

612 return Impl(std::bool_constant());

613}

614

615std::vector Vectorizer::splitChainByContiguity(Chain &C) {

616 if (C.empty())

617 return {};

618

619 sortChainInOffsetOrder(C);

620

622 dbgs() << "LSV: splitChainByContiguity considering chain:\n";

623 dumpChain(C);

624 });

625

626 std::vector Ret;

627 Ret.push_back({C.front()});

628

629 unsigned ChainElemTyBits = DL.getTypeSizeInBits(getChainElemTy(C));

630 APInt PrevReadEnd = C[0].OffsetFromLeader +

632 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {

633 auto &CurChain = Ret.back();

634 unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst));

635

636

638 8 * SzBytes % ChainElemTyBits == 0 &&

639 "Every chain-element size must be a multiple of the element size after "

640 "vectorization.");

641 APInt ReadEnd = It->OffsetFromLeader + SzBytes;

642

643 bool AreContiguous = false;

644 if (It->OffsetFromLeader.sle(PrevReadEnd)) {

645

646 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();

647 if (8 * Overlap % ChainElemTyBits == 0)

648 AreContiguous = true;

649 }

650

652 << (AreContiguous ? "contiguous" : "chain-breaker")

653 << *It->Inst << " (starts at offset "

654 << It->OffsetFromLeader << ")\n");

655

656 if (AreContiguous)

657 CurChain.push_back(*It);

658 else

659 Ret.push_back({*It});

661 }

662

663

664 llvm::erase_if(Ret, [](const auto &Chain) { return Chain.size() <= 1; });

665 return Ret;

666}

667

668Type *Vectorizer::getChainElemTy(const Chain &C) {

670

671

672

673

674

675

676

677

678

679

680

681 if (any_of(C, [](const ChainElem &E) {

683 })) {

684 return Type::getIntNTy(

685 F.getContext(),

687 }

688

689 for (const ChainElem &E : C)

691 return T;

693}

694

695std::vector Vectorizer::splitChainByAlignment(Chain &C) {

696

697

698

699

700

701

702

703

704

705 if (C.empty())

706 return {};

707

708 sortChainInOffsetOrder(C);

709

711 dbgs() << "LSV: splitChainByAlignment considering chain:\n";

712 dumpChain(C);

713 });

714

716 auto GetVectorFactor = [&](unsigned VF, unsigned LoadStoreSize,

717 unsigned ChainSizeBytes, VectorType *VecTy) {

719 ChainSizeBytes, VecTy)

721 ChainSizeBytes, VecTy);

722 };

723

724#ifndef NDEBUG

725 for (const auto &E : C) {

728 "Should have filtered out non-power-of-two elements in "

729 "collectEquivalenceClasses.");

730 }

731#endif

732

735

736 std::vector Ret;

737 for (unsigned CBegin = 0; CBegin < C.size(); ++CBegin) {

738

739

740 SmallVector<std::pair<unsigned , unsigned >, 8>

741 CandidateChains;

742

743

745 APInt PrevReadEnd = C[CBegin].OffsetFromLeader + Sz;

746 for (unsigned CEnd = CBegin + 1, Size = C.size(); CEnd < Size; ++CEnd) {

747 APInt ReadEnd = C[CEnd].OffsetFromLeader +

749 unsigned BytesAdded =

750 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;

751 Sz += BytesAdded;

752 if (Sz > VecRegBytes)

753 break;

754 CandidateChains.emplace_back(CEnd, Sz);

756 }

757

758

759 for (auto It = CandidateChains.rbegin(), End = CandidateChains.rend();

760 It != End; ++It) {

761 auto [CEnd, SizeBytes] = *It;

763 dbgs() << "LSV: splitChainByAlignment considering candidate chain ["

764 << *C[CBegin].Inst << " ... " << *C[CEnd].Inst << "]\n");

765

766 Type *VecElemTy = getChainElemTy(C);

767

768

769

770 unsigned VecElemBits = DL.getTypeSizeInBits(VecElemTy);

771

772

773 assert((8 * SizeBytes) % VecElemBits == 0);

774 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;

776 unsigned VF = 8 * VecRegBytes / VecElemBits;

777

778

779 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,

780 VecElemBits * NumVecElems / 8, VecTy);

781 if (TargetVF != VF && TargetVF < NumVecElems) {

783 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "

784 "because TargetVF="

785 << TargetVF << " != VF=" << VF

786 << " and TargetVF < NumVecElems=" << NumVecElems << "\n");

787 continue;

788 }

789

790

791

792

793

794 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &TTI = TTI,

795 &F = F](Align Alignment) {

796 if (Alignment.value() % SizeBytes == 0)

797 return true;

798 unsigned VectorizedSpeed = 0;

800 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);

801 if (!AllowsMisaligned) {

803 << "LSV: Access of " << SizeBytes << "B in addrspace "

804 << AS << " with alignment " << Alignment.value()

805 << " is misaligned, and therefore can't be vectorized.\n");

806 return false;

807 }

808

809 unsigned ElementwiseSpeed = 0;

810 (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,

811 Alignment, &ElementwiseSpeed);

812 if (VectorizedSpeed < ElementwiseSpeed) {

814 << "LSV: Access of " << SizeBytes << "B in addrspace "

815 << AS << " with alignment " << Alignment.value()

816 << " has relative speed " << VectorizedSpeed

817 << ", which is lower than the elementwise speed of "

818 << ElementwiseSpeed

819 << ". Therefore this access won't be vectorized.\n");

820 return false;

821 }

822 return true;

823 };

824

825

826

827

828

829

830

831

832

833

835 bool IsAllocaAccess = AS == DL.getAllocaAddrSpace() &&

838 Align PrefAlign = Align(StackAdjustedAlignment);

839 if (IsAllocaAccess && Alignment.value() % SizeBytes != 0 &&

840 IsAllowedAndFast(PrefAlign)) {

842 PtrOperand, PrefAlign, DL, C[CBegin].Inst, nullptr, &DT);

843 if (NewAlign >= Alignment) {

845 << "LSV: splitByChain upgrading alloca alignment from "

846 << Alignment.value() << " to " << NewAlign.value()

847 << "\n");

848 Alignment = NewAlign;

849 }

850 }

851

852 if (!IsAllowedAndFast(Alignment)) {

854 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "

855 "because its alignment is not AllowedAndFast: "

856 << Alignment.value() << "\n");

857 continue;

858 }

859

860 if ((IsLoadChain &&

862 (!IsLoadChain &&

865 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "

866 "because !isLegalToVectorizeLoad/StoreChain.");

867 continue;

868 }

869

870

871 Chain &NewChain = Ret.emplace_back();

872 for (unsigned I = CBegin; I <= CEnd; ++I)

873 NewChain.emplace_back(C[I]);

874 CBegin = CEnd;

875 break;

876 }

877 }

878 return Ret;

879}

880

881bool Vectorizer::vectorizeChain(Chain &C) {

882 if (C.size() < 2)

883 return false;

884

885 sortChainInOffsetOrder(C);

886

888 dbgs() << "LSV: Vectorizing chain of " << C.size() << " instructions:\n";

889 dumpChain(C);

890 });

891

892 Type *VecElemTy = getChainElemTy(C);

895 unsigned BytesAdded = DL.getTypeStoreSize(getLoadStoreType(&*C[0].Inst));

896 APInt PrevReadEnd = C[0].OffsetFromLeader + BytesAdded;

897 unsigned ChainBytes = BytesAdded;

898 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {

899 unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst));

900 APInt ReadEnd = It->OffsetFromLeader + SzBytes;

901

902 BytesAdded =

903 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;

904 ChainBytes += BytesAdded;

906 }

907

908 assert(8 * ChainBytes % DL.getTypeSizeInBits(VecElemTy) == 0);

909

910

911 unsigned NumElem = 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy);

913

915

916

917 if (AS == DL.getAllocaAddrSpace()) {

918 Alignment = std::max(

919 Alignment,

921 MaybeAlign(), DL, C[0].Inst, nullptr, &DT));

922 }

923

924

925#ifndef NDEBUG

926 for (const ChainElem &E : C)

928 DL.getTypeStoreSize(VecElemTy));

929#endif

930

932 if (IsLoadChain) {

933

934

937 return A.Inst->comesBefore(B.Inst);

938 })->Inst);

939

940

941 if (NumElem == 1)

942 VecTy = VecElemTy;

943

944

947 Alignment);

948

949 for (const ChainElem &E : C) {

953 unsigned EOffset =

954 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();

955 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);

957 V = VecInst;

960 llvm::seq(VecIdx, VecIdx + VT->getNumElements()));

962 } else {

965 }

966 if (V->getType() != I->getType())

969 }

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990 reorder(VecInst);

991 } else {

992

994 return A.Inst->comesBefore(B.Inst);

995 })->Inst);

996

997

999 auto InsertElem = [&](Value *V, unsigned VecIdx) {

1000 if (V->getType() != VecElemTy)

1003 };

1004 for (const ChainElem &E : C) {

1006 unsigned EOffset =

1007 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();

1008 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);

1009 if (FixedVectorType *VT =

1011 for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) {

1014 VecIdx++);

1015 }

1016 } else {

1017 InsertElem(I->getValueOperand(), VecIdx);

1018 }

1019 }

1020

1021

1022

1024 Vec,

1026 Alignment);

1027 }

1028

1030

1031 for (const ChainElem &E : C)

1032 ToErase.emplace_back(E.Inst);

1033

1034 ++NumVectorInstructions;

1035 NumScalarsVectorized += C.size();

1036 return true;

1037}

1038

1039template

1040bool Vectorizer::isSafeToMove(

1041 Instruction *ChainElem, Instruction *ChainBegin,

1042 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,

1043 BatchAAResults &BatchAA) {

1044 LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "

1045 << *ChainBegin << ")\n");

1046

1048 if (ChainElem == ChainBegin)

1049 return true;

1050

1051

1052

1054 return true;

1055

1056 auto BBIt = std::next([&] {

1057 if constexpr (IsLoadChain)

1059 else

1061 }());

1062 auto BBItEnd = std::next([&] {

1063 if constexpr (IsLoadChain)

1065 else

1067 }());

1068

1069 const APInt &ChainElemOffset = ChainOffsets.at(ChainElem);

1070 const unsigned ChainElemSize =

1072

1073 for (; BBIt != BBItEnd; ++BBIt) {

1075

1076 if (I->mayReadOrWriteMemory())

1077 continue;

1078

1079

1081 continue;

1082

1083

1085 continue;

1086

1087

1088

1089

1090

1091

1092

1093 if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {

1094

1095

1096

1097

1098

1099

1100 const APInt &IOffset = OffsetIt->second;

1102 if (IOffset == ChainElemOffset ||

1103 (IOffset.sle(ChainElemOffset) &&

1104 (IOffset + IElemSize).sgt(ChainElemOffset)) ||

1105 (ChainElemOffset.sle(IOffset) &&

1106 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {

1108

1109

1113 dbgs() << "LSV: Found alias in chain: " << *I << "\n";

1114 });

1115 return false;

1116 }

1117

1118 continue;

1119 }

1120

1121 LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");

1125 << " Aliasing instruction:\n"

1126 << " " << *I << '\n'

1127 << " Aliased instruction and pointer:\n"

1128 << " " << *ChainElem << '\n'

1130 << '\n');

1131

1132 return false;

1133 }

1134 }

1135 return true;

1136}

1137

1143

1145 unsigned MatchingOpIdxA, Instruction *AddOpB,

1146 unsigned MatchingOpIdxB, bool Signed) {

1147 LLVM_DEBUG(dbgs() << "LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff

1148 << ", AddOpA=" << *AddOpA << ", MatchingOpIdxA="

1149 << MatchingOpIdxA << ", AddOpB=" << *AddOpB

1150 << ", MatchingOpIdxB=" << MatchingOpIdxB

1151 << ", Signed=" << Signed << "\n");

1152

1153

1154

1155

1156

1157

1158

1159

1160

1161

1162

1163

1164

1165

1167 AddOpB->getOpcode() == Instruction::Add &&

1169 if (AddOpA->getOperand(MatchingOpIdxA) ==

1170 AddOpB->getOperand(MatchingOpIdxB)) {

1171 Value *OtherOperandA = AddOpA->getOperand(MatchingOpIdxA == 1 ? 0 : 1);

1172 Value *OtherOperandB = AddOpB->getOperand(MatchingOpIdxB == 1 ? 0 : 1);

1175

1176 if (OtherInstrB && OtherInstrB->getOpcode() == Instruction::Add &&

1179 int64_t CstVal =

1181 if (OtherInstrB->getOperand(0) == OtherOperandA &&

1183 return true;

1184 }

1185

1186 if (OtherInstrA && OtherInstrA->getOpcode() == Instruction::Add &&

1189 int64_t CstVal =

1191 if (OtherInstrA->getOperand(0) == OtherOperandB &&

1193 return true;

1194 }

1195

1196

1197 if (OtherInstrA && OtherInstrB &&

1198 OtherInstrA->getOpcode() == Instruction::Add &&

1199 OtherInstrB->getOpcode() == Instruction::Add &&

1204 int64_t CstValA =

1206 int64_t CstValB =

1209 IdxDiff.getSExtValue() == (CstValB - CstValA))

1210 return true;

1211 }

1212 }

1213 return false;

1214}

1215

1216std::optional Vectorizer::getConstantOffsetComplexAddrs(

1217 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {

1218 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA

1219 << " PtrB=" << *PtrB << " ContextInst=" << *ContextInst

1220 << " Depth=" << Depth << "\n");

1223 if (!GEPA || !GEPB)

1224 return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth);

1225

1226

1227

1228 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||

1229 GEPA->getPointerOperand() != GEPB->getPointerOperand())

1230 return std::nullopt;

1233 for (unsigned I = 0, E = GEPA->getNumIndices() - 1; I < E; ++I) {

1235 return std::nullopt;

1236 ++GTIA;

1237 ++GTIB;

1238 }

1239

1244 return std::nullopt;

1245

1247

1248

1250 return std::nullopt;

1251

1253

1254

1258 return std::nullopt;

1259

1260 const SCEV *OffsetSCEVA = SE.getSCEV(ValA);

1261 const SCEV *OffsetSCEVB = SE.getSCEV(OpB);

1262 const SCEV *IdxDiffSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);

1264 return std::nullopt;

1265

1266 ConstantRange IdxDiffRange = SE.getSignedRange(IdxDiffSCEV);

1268 return std::nullopt;

1270

1271 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff

1272 << "\n");

1273

1274

1275 bool Safe = false;

1276

1277

1278

1279 if (OpB->getOpcode() == Instruction::Add &&

1283 Safe = true;

1284

1285

1286

1288 if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&

1291

1292

1293

1294 for (unsigned MatchingOpIdxA : {0, 1})

1295 for (unsigned MatchingOpIdxB : {0, 1})

1296 if (!Safe)

1298 MatchingOpIdxB, Signed);

1299 }

1300

1302

1303

1304

1305

1306

1307

1308

1309

1310 if (!Safe) {

1311

1312

1315 &DT);

1316 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());

1319 Safe = BitsAllowedToBeSet.uge(IdxDiff.abs());

1320 }

1321

1322 if (Safe)

1323 return IdxDiff * Stride;

1324 return std::nullopt;

1325}

1326

1327std::optional Vectorizer::getConstantOffsetSelects(

1328 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {

1329 if (Depth++ == MaxDepth)

1330 return std::nullopt;

1331

1334 if (SelectA->getCondition() != SelectB->getCondition())

1335 return std::nullopt;

1336 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA

1337 << ", PtrB=" << *PtrB << ", ContextInst="

1338 << *ContextInst << ", Depth=" << Depth << "\n");

1339 std::optional TrueDiff = getConstantOffset(

1340 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth);

1341 if (!TrueDiff)

1342 return std::nullopt;

1343 std::optional FalseDiff =

1344 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),

1345 ContextInst, Depth);

1346 if (TrueDiff == FalseDiff)

1347 return TrueDiff;

1348 }

1349 }

1350 return std::nullopt;

1351}

1352

1353void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {

1354 if (EQClasses.size() < 2)

1355 return;

1356

1357

1358

1359 static_assert(std::tuple_size_v == 4,

1360 "EqClassKey has changed - EqClassReducedKey needs changes too");

1361 using EqClassReducedKey =

1362 std::tuple<std::tuple_element_t<1, EqClassKey> ,

1363 std::tuple_element_t<2, EqClassKey> ,

1364 std::tuple_element_t<3, EqClassKey> >;

1365 using ECReducedKeyToUnderlyingObjectMap =

1366 MapVector<EqClassReducedKey,

1367 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;

1368

1369

1370

1371

1372 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;

1373 bool FoundPotentiallyOptimizableEC = false;

1374 for (const auto &EC : EQClasses) {

1375 const auto &Key = EC.first;

1376 EqClassReducedKey RedKey{std::get<1>(Key), std::get<2>(Key),

1377 std::get<3>(Key)};

1378 auto &UOMap = RedKeyToUOMap[RedKey];

1380 if (UOMap.size() > 1)

1381 FoundPotentiallyOptimizableEC = true;

1382 }

1383 if (!FoundPotentiallyOptimizableEC)

1384 return;

1385

1387 dbgs() << "LSV: mergeEquivalenceClasses: before merging:\n";

1388 for (const auto &EC : EQClasses) {

1389 dbgs() << " Key: {" << EC.first << "}\n";

1390 for (const auto &Inst : EC.second)

1391 dbgs() << " Inst: " << *Inst << '\n';

1392 }

1393 });

1395 dbgs() << "LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";

1396 for (const auto &RedKeyToUO : RedKeyToUOMap) {

1397 dbgs() << " Reduced key: {" << std::get<0>(RedKeyToUO.first) << ", "

1398 << std::get<1>(RedKeyToUO.first) << ", "

1399 << static_cast<int>(std::get<2>(RedKeyToUO.first)) << "} --> "

1400 << RedKeyToUO.second.size() << " underlying objects:\n";

1401 for (auto UObject : RedKeyToUO.second)

1402 dbgs() << " " << *UObject << '\n';

1403 }

1404 });

1405

1406 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;

1407

1408

1409 auto GetUltimateTargets =

1410 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {

1411 UObjectToUObjectMap IndirectionMap;

1412 for (const auto *UObject : UObjects) {

1413 const unsigned MaxLookupDepth = 1;

1414 const auto *UltimateTarget = getUnderlyingObject(UObject, MaxLookupDepth);

1415 if (UltimateTarget != UObject)

1416 IndirectionMap[UObject] = UltimateTarget;

1417 }

1418 UObjectToUObjectMap UltimateTargetsMap;

1419 for (const auto *UObject : UObjects) {

1420 auto Target = UObject;

1421 auto It = IndirectionMap.find(Target);

1422 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))

1423 Target = It->second;

1424 UltimateTargetsMap[UObject] = Target;

1425 }

1426 return UltimateTargetsMap;

1427 };

1428

1429

1430

1431 for (auto &[RedKey, UObjects] : RedKeyToUOMap) {

1432 if (UObjects.size() < 2)

1433 continue;

1434 auto UTMap = GetUltimateTargets(UObjects);

1435 for (const auto &[UObject, UltimateTarget] : UTMap) {

1436 if (UObject == UltimateTarget)

1437 continue;

1438

1439 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),

1440 std::get<2>(RedKey)};

1441 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),

1442 std::get<2>(RedKey)};

1443

1444

1445 const auto &VecTo = EQClasses[KeyTo];

1446 const auto &VecFrom = EQClasses[KeyFrom];

1447 SmallVector<Instruction *, 8> MergedVec;

1448 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),

1449 std::back_inserter(MergedVec),

1450 [](Instruction *A, Instruction *B) {

1451 return A && B && A->comesBefore(B);

1452 });

1453 EQClasses[KeyTo] = std::move(MergedVec);

1454 EQClasses.erase(KeyFrom);

1455 }

1456 }

1458 dbgs() << "LSV: mergeEquivalenceClasses: after merging:\n";

1459 for (const auto &EC : EQClasses) {

1460 dbgs() << " Key: {" << EC.first << "}\n";

1461 for (const auto &Inst : EC.second)

1462 dbgs() << " Inst: " << *Inst << '\n';

1463 }

1464 });

1465}

1466

1467EquivalenceClassMap

1470 EquivalenceClassMap Ret;

1471

1472 auto GetUnderlyingObject = [](const Value *Ptr) -> const Value * {

1475

1476

1477

1478

1479

1480

1481 return Sel->getCondition();

1482 }

1483 return ObjPtr;

1484 };

1485

1486 for (Instruction &I : make_range(Begin, End)) {

1489 if (!LI && !SI)

1490 continue;

1491

1492 if ((LI && !LI->isSimple()) || (SI && SI->isSimple()))

1493 continue;

1494

1497 continue;

1498

1500 if (!VectorType::isValidElementType(Ty->getScalarType()))

1501 continue;

1502

1503

1504

1505 unsigned TySize = DL.getTypeSizeInBits(Ty);

1506 if ((TySize % 8) != 0)

1507 continue;

1508

1509

1510

1511

1512

1514 continue;

1515

1519

1520 unsigned VF = VecRegSize / TySize;

1522

1523

1524 if ((!VecTy && isPowerOf2\_32(DL.getTypeSizeInBits(Ty))) ||

1525 (VecTy && isPowerOf2\_32(DL.getTypeSizeInBits(VecTy->getScalarType()))))

1526 continue;

1527

1528

1529 if (TySize > VecRegSize / 2 ||

1531 continue;

1532

1533 Ret[{GetUnderlyingObject(Ptr), AS,

1535 LI != nullptr}]

1536 .emplace_back(&I);

1537 }

1538

1539 mergeEquivalenceClasses(Ret);

1540 return Ret;

1541}

1542

1544 if (Instrs.empty())

1545 return {};

1546

1548 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);

1549

1550#ifndef NDEBUG

1551

1552 for (size_t I = 1; I < Instrs.size(); ++I) {

1553 assert(Instrs[I - 1]->comesBefore(Instrs[I]));

1555 }

1556#endif

1557

1558

1559

1560

1561

1562 struct InstrListElem : ilist_node,

1563 std::pair<Instruction *, Chain> {

1564 explicit InstrListElem(Instruction *I)

1566 };

1567 struct InstrListElemDenseMapInfo {

1568 using PtrInfo = DenseMapInfo<InstrListElem *>;

1569 using IInfo = DenseMapInfo<Instruction *>;

1570 static InstrListElem *getEmptyKey() { return PtrInfo::getEmptyKey(); }

1571 static InstrListElem *getTombstoneKey() {

1572 return PtrInfo::getTombstoneKey();

1573 }

1574 static unsigned getHashValue(const InstrListElem *E) {

1575 return IInfo::getHashValue(E->first);

1576 }

1577 static bool isEqual(const InstrListElem *A, const InstrListElem *B) {

1578 if (A == getEmptyKey() || B == getEmptyKey())

1579 return A == getEmptyKey() && B == getEmptyKey();

1580 if (A == getTombstoneKey() || B == getTombstoneKey())

1581 return A == getTombstoneKey() && B == getTombstoneKey();

1582 return IInfo::isEqual(A->first, B->first);

1583 }

1584 };

1585 SpecificBumpPtrAllocator Allocator;

1586 simple_ilist MRU;

1587 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;

1588

1589

1590

1591

1592 for (Instruction *I : Instrs) {

1593 constexpr int MaxChainsToTry = 64;

1594

1595 bool MatchFound = false;

1596 auto ChainIter = MRU.begin();

1597 for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();

1598 ++J, ++ChainIter) {

1599 if (std::optional Offset = getConstantOffset(

1602

1603 (ChainIter->first->comesBefore(I) ? I : ChainIter->first))) {

1604

1605

1606 ChainIter->second.emplace_back(I, Offset.value());

1607

1608 MRU.remove(*ChainIter);

1610 MatchFound = true;

1611 break;

1612 }

1613 }

1614

1615 if (!MatchFound) {

1616 APInt ZeroOffset(ASPtrBits, 0);

1617 InstrListElem *E = new (Allocator.Allocate()) InstrListElem(I);

1618 E->second.emplace_back(I, ZeroOffset);

1621 }

1622 }

1623

1624 std::vector Ret;

1625 Ret.reserve(Chains.size());

1626

1627 for (auto &E : MRU)

1628 if (E.second.size() > 1)

1629 Ret.emplace_back(std::move(E.second));

1630 return Ret;

1631}

1632

1633std::optional Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,

1634 Instruction *ContextInst,

1635 unsigned Depth) {

1636 LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA

1637 << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst

1638 << ", Depth=" << Depth << "\n");

1639

1640

1641 unsigned OrigBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());

1642 APInt OffsetA(OrigBitWidth, 0);

1643 APInt OffsetB(OrigBitWidth, 0);

1646 unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());

1647 if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))

1648 return std::nullopt;

1649

1650

1651

1652

1653 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&

1654 OffsetB.getSignificantBits() <= NewPtrBitWidth);

1655

1656 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);

1657 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);

1658 if (PtrA == PtrB)

1659 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);

1660

1661

1664 LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");

1665 ConstantRange DistRange = SE.getSignedRange(DistScev);

1667

1668

1670 return (OffsetB - OffsetA + Dist).sextOrTrunc(OrigBitWidth);

1671 }

1672 }

1673 if (std::optional Diff =

1674 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth))

1675 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))

1676 .sextOrTrunc(OrigBitWidth);

1677 return std::nullopt;

1678}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static bool isEqual(const Function &Caller, const Function &Callee)

This file contains the simple types necessary to represent the attributes associated with functions a...

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

static bool runOnFunction(Function &F, bool PostInlining)

Module.h This file contains the declarations for the Module class.

static bool checkNoWrapFlags(Instruction *I, bool Signed)

Definition LoadStoreVectorizer.cpp:1138

static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)

Definition LoadStoreVectorizer.cpp:1144

This file implements a map that provides insertion order iteration.

This file provides utility analysis objects describing memory locations.

static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)

Check if it's safe to move From down to To, checking that no physical registers are clobbered.

Provides some synthesis utilities to produce sequences of values.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

A manager for alias analyses.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

Class for arbitrary precision integers.

void clearBit(unsigned BitPosition)

Set a given bit to 0.

APInt abs() const

Get the absolute value.

unsigned getBitWidth() const

Return the number of bits in the APInt.

bool sle(const APInt &RHS) const

Signed less or equal comparison.

LLVM_ABI APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

bool sge(const APInt &RHS) const

Signed greater or equal comparison.

int64_t getSExtValue() const

Get sign extended value.

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

InstListType::reverse_iterator reverse_iterator

InstListType::iterator iterator

Instruction iterators...

ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)

Represents analyses that only rely on functions' control flow.

const APInt * getSingleElement() const

If this set contains a single element, return it, otherwise return null.

bool isSingleElement() const

Return true if this set contains exactly one member.

ValueT & at(const_arg_type_t< KeyT > Val)

at - Return the entry for the specified key, or abort if no such entry exists.

iterator find(const_arg_type_t< KeyT > Val)

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

FunctionPass class - This class is used to implement most global optimizations.

Legacy wrapper pass to provide the GlobalsAAResult object.

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

ConstantInt * getInt32(uint32_t C)

Get a constant 32-bit value.

Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)

LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY

Determine whether the no unsigned wrap flag is set.

LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY

Determine whether the no signed wrap flag is set.

bool hasMetadata() const

Return true if this instruction has any metadata attached to it.

LLVM_ABI void moveBefore(InstListType::iterator InsertPos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

LLVM_ABI bool comesBefore(const Instruction *Other) const

Given an instruction Other in the same basic block as this instruction, return true if this instructi...

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

An instruction for reading from memory.

LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition LoadStoreVectorizer.cpp:410

This class implements a map that also provides access to all stored values in a deterministic order.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

static LLVM_ABI MemoryLocation get(const LoadInst *LI)

Return a location with information about the memory reference by the given instruction.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

Legacy wrapper pass to provide the SCEVAAResult object.

Analysis pass that exposes the ScalarEvolution for a function.

The main scalar evolution driver.

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

ConstantRange getSignedRange(const SCEV *S)

Determine the signed range for a particular SCEV.

LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

LLVM_ABI const SCEV * getCouldNotCompute()

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

reference emplace_back(ArgTypes &&... Args)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Analysis pass providing the TargetTransformInfo.

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI bool isLegalToVectorizeLoad(LoadInst *LI) const

LLVM_ABI bool isLegalToVectorizeStore(StoreInst *SI) const

LLVM_ABI unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

LLVM_ABI bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

LLVM_ABI unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const

LLVM_ABI unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

LLVM_ABI bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const

Determine if the target supports unaligned memory accesses.

LLVM_ABI bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

bool isVectorTy() const

True if this is an instance of VectorType.

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

Value * getOperand(unsigned i) const

unsigned getNumOperands() const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const

This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI const Value * stripPointerCasts() const

Strip off pointer casts, all-zero GEPs and address space casts.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

std::pair< iterator, bool > insert(const ValueT &V)

TypeSize getSequentialElementStride(const DataLayout &DL) const

Value * getOperand() const

const ParentTy * getParent() const

This class implements an extremely fast bulk output stream that can only output to a stream.

void push_front(reference Node)

Insert a node at the front; never copies.

void remove(reference N)

Remove a node by reference; never deletes.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

Abstract Attribute helper functions.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

const APInt & smax(const APInt &A, const APInt &B)

Determine the larger of two APInts considered to be signed.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

Context & getContext() const

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

auto min_element(R &&Range)

Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...

unsigned getLoadStoreAddressSpace(const Value *I)

A helper function that returns the address space of the pointer operand of load or store instruction.

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

LLVM_ABI Pass * createLoadStoreVectorizerPass()

Create a legacy pass manager instance of the LoadStoreVectorizer pass.

iterator_range< po_iterator< T > > post_order(const T &G)

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)

Try to ensure that the alignment of V is at least PrefAlign bytes.

bool isModSet(const ModRefInfo MRI)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

generic_gep_type_iterator<> gep_type_iterator

bool isModOrRefSet(const ModRefInfo MRI)

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key

ModRefInfo

Flags indicating whether a memory access modifies or references memory.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

LLVM_ABI void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

ArrayRef(const T &OneElt) -> ArrayRef< T >

constexpr unsigned BitWidth

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)

Return true if this function can prove that the instruction I will always transfer execution to one o...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

gep_type_iterator gep_type_begin(const User *GEP)

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

AAResults AliasAnalysis

Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.