LLVM: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

106#include

107#include

108#include

109#include

110#include

111#include

112#include

113#include

114#include <type_traits>

115#include

116#include

117

118using namespace llvm;

119

120#define DEBUG_TYPE "load-store-vectorizer"

121

122STATISTIC(NumVectorInstructions, "Number of vector accesses generated");

123STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");

124

125namespace {

126

127

128

129

130

131

132using EqClassKey =

133 std::tuple<const Value * ,

134 unsigned ,

135 unsigned ,

136 char

137 >;

139 const EqClassKey &K) {

140 const auto &[UnderlyingObject, AddrSpace, ElementSize, IsLoad] = K;

141 return OS << (IsLoad ? "load" : "store") << " of " << *UnderlyingObject

142 << " of element size " << ElementSize << " bits in addrspace "

143 << AddrSpace;

144}

145

146

147

148

149

150

151

152

153

154

155

156

157struct ChainElem {

159 APInt OffsetFromLeader;

160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)

161 : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}

162};

164

165void sortChainInBBOrder(Chain &C) {

166 sort(C, [](auto &A, auto &B) { return A.Inst->comesBefore(B.Inst); });

167}

168

169void sortChainInOffsetOrder(Chain &C) {

170 sort(C, [](const auto &A, const auto &B) {

171 if (A.OffsetFromLeader != B.OffsetFromLeader)

172 return A.OffsetFromLeader.slt(B.OffsetFromLeader);

173 return A.Inst->comesBefore(B.Inst);

174 });

175}

176

178 for (const auto &E : C) {

179 dbgs() << " " << *E.Inst << " (offset " << E.OffsetFromLeader << ")\n";

180 }

181}

182

183using EquivalenceClassMap =

185

186

187constexpr unsigned StackAdjustedAlignment = 4;

188

191 for (const ChainElem &E : C)

194}

195

198 return LI != nullptr && LI->hasMetadata(LLVMContext::MD_invariant_load);

199}

200

201

202

206

208 while (!Worklist.empty()) {

211 for (int Idx = 0; Idx < NumOperands; Idx++) {

213 if (!IM || IM->getOpcode() == Instruction::PHI)

214 continue;

215

216

217

218 if (IM->getParent() != I->getParent())

219 continue;

220

221 assert(IM != I && "Unexpected cycle while re-ordering instructions");

222

224 InstructionsToMove.insert(IM);

226 }

227 }

228 }

229

230

231 for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;) {

233 if (!InstructionsToMove.contains(IM))

234 continue;

236 }

237}

238

239class Vectorizer {

242 AssumptionCache &AC;

243 DominatorTree &DT;

244 ScalarEvolution &SE;

245 TargetTransformInfo &TTI;

246 const DataLayout &DL;

248

249

250

251

252

254

255

256

257 DenseSet<Instruction *> ExtraElements;

258

259public:

260 Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,

261 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)

262 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),

263 DL(F.getDataLayout()), Builder(SE.getContext()) {}

264

265 bool run();

266

267private:

268 static const unsigned MaxDepth = 3;

269

270

271

272

274

275

276

277 bool runOnEquivalenceClass(const EqClassKey &EqClassKey,

279

280

281

282

283 bool runOnChain(Chain &C);

284

285

286

287

288

289 std::vector splitChainByContiguity(Chain &C);

290

291

292

293

294

295 std::vector splitChainByMayAliasInstrs(Chain &C);

296

297

298

299 std::vector splitChainByAlignment(Chain &C);

300

301

302

303 bool vectorizeChain(Chain &C);

304

305

306 std::optional getConstantOffset(Value *PtrA, Value *PtrB,

307 Instruction *ContextInst,

308 unsigned Depth = 0);

309 std::optional getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,

310 Instruction *ContextInst,

312 std::optional getConstantOffsetSelects(Value *PtrA, Value *PtrB,

313 Instruction *ContextInst,

315

316

317

318

319 Type *getChainElemTy(const Chain &C);

320

321

322

323

324

325

326

327

328 template

330 Instruction *ChainElem, Instruction *ChainBegin,

331 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,

332 BatchAAResults &BatchAA);

333

334

335

336

337 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const;

338

339

340

341

342

343

346

347

348

349

350

351

353

354

355

356

357

358 bool accessIsAllowedAndFast(unsigned SizeBytes, unsigned AS, Align Alignment,

359 unsigned VecElemBits) const;

360

361

362

363

364

365 ChainElem createExtraElementAfter(const ChainElem &PrevElem, Type *Ty,

366 APInt Offset, StringRef Prefix,

367 Align Alignment = Align());

368

369

370

372 FixedVectorType *VecTy);

373

374

375

376 void deleteExtraElements();

377};

378

379class LoadStoreVectorizerLegacyPass : public FunctionPass {

380public:

381 static char ID;

382

383 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {

386 }

387

389

390 StringRef getPassName() const override {

391 return "GPU Load and Store Vectorizer";

392 }

393

394 void getAnalysisUsage(AnalysisUsage &AU) const override {

396 AU.addRequired();

397 AU.addRequired();

398 AU.addRequired();

399 AU.addRequired();

401 }

402};

403

404}

405

406char LoadStoreVectorizerLegacyPass::ID = 0;

407

409 "Vectorize load and Store instructions", false, false)

417 "Vectorize load and store instructions", false, false)

418

420 return new LoadStoreVectorizerLegacyPass();

421}

422

423bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {

424

425 if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))

426 return false;

427

428 AliasAnalysis &AA = getAnalysis().getAAResults();

429 DominatorTree &DT = getAnalysis().getDomTree();

430 ScalarEvolution &SE = getAnalysis().getSE();

431 TargetTransformInfo &TTI =

432 getAnalysis().getTTI(F);

433

434 AssumptionCache &AC =

435 getAnalysis().getAssumptionCache(F);

436

437 return Vectorizer(F, AA, AC, DT, SE, TTI).run();

438}

439

442

443 if (F.hasFnAttribute(Attribute::NoImplicitFloat))

445

451

452 bool Changed = Vectorizer(F, AA, AC, DT, SE, TTI).run();

456}

457

458bool Vectorizer::run() {

460

461

462

463

464

465

466

467

468

469

470

471

472

473

475

476 assert(!BB->empty());

477

484

485 for (auto It = Barriers.begin(), End = std::prev(Barriers.end()); It != End;

486 ++It)

487 Changed |= runOnPseudoBB(*It, *std::next(It));

488

490

491

492

493

494

495

497 continue;

499 if (I->use_empty())

500 I->eraseFromParent();

502 }

503 ToErase.clear();

504 deleteExtraElements();

505 }

506

508}

509

513 dbgs() << "LSV: Running on pseudo-BB [" << *Begin << " ... ";

514 if (End != Begin->getParent()->end())

515 dbgs() << *End;

516 else

517 dbgs() << "";

518 dbgs() << ")\n";

519 });

520

522 for (const auto &[EqClassKey, EqClass] :

523 collectEquivalenceClasses(Begin, End))

524 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);

525

527}

528

529bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,

532

534 dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()

535 << " keyed on " << EqClassKey << ":\n";

536 for (Instruction *I : EqClass)

537 dbgs() << " " << *I << "\n";

538 });

539

540 std::vector Chains = gatherChains(EqClass);

542 << " nontrivial chains.\n";);

543 for (Chain &C : Chains)

546}

547

548bool Vectorizer::runOnChain(Chain &C) {

550 dbgs() << "LSV: Running on chain with " << C.size() << " instructions:\n";

551 dumpChain(C);

552 });

553

554

555

556

557

558

559

561 for (auto &C : splitChainByMayAliasInstrs(C))

562 for (auto &C : splitChainByContiguity(C))

563 for (auto &C : splitChainByAlignment(C))

564 Changed |= vectorizeChain(C);

566}

567

568std::vector Vectorizer::splitChainByMayAliasInstrs(Chain &C) {

569 if (C.empty())

570 return {};

571

572 sortChainInBBOrder(C);

573

575 dbgs() << "LSV: splitChainByMayAliasInstrs considering chain:\n";

576 dumpChain(C);

577 });

578

579

580

581

582 DenseMap<Instruction *, APInt > ChainOffsets;

583 for (const auto &E : C)

584 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});

585

586

587

588 BatchAAResults BatchAA(AA);

589

590

591

592

593

594

595

596

597

598

599

600

601 auto Impl = [&](auto IsLoad) {

602

603 auto [ChainBegin, ChainEnd] = [&](auto IsLoad) {

604 if constexpr (IsLoad())

605 return std::make_pair(C.begin(), C.end());

606 else

607 return std::make_pair(C.rbegin(), C.rend());

608 }(IsLoad);

609 assert(ChainBegin != ChainEnd);

610

611 std::vector Chains;

614 for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {

616 ChainOffsets, BatchAA)) {

617 LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "

618 << *ChainIt->Inst << " into " << *ChainBegin->Inst

619 << "\n");

621 } else {

623 dbgs() << "LSV: Found intervening may-alias instrs; cannot merge "

624 << *ChainIt->Inst << " into " << *ChainBegin->Inst << "\n");

625 if (NewChain.size() > 1) {

627 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";

628 dumpChain(NewChain);

629 });

630 Chains.emplace_back(std::move(NewChain));

631 }

632

633

635 }

636 }

637 if (NewChain.size() > 1) {

639 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";

640 dumpChain(NewChain);

641 });

642 Chains.emplace_back(std::move(NewChain));

643 }

644 return Chains;

645 };

646

648 return Impl(std::bool_constant());

649

651 return Impl(std::bool_constant());

652}

653

654std::vector Vectorizer::splitChainByContiguity(Chain &C) {

655 if (C.empty())

656 return {};

657

658 sortChainInOffsetOrder(C);

659

661 dbgs() << "LSV: splitChainByContiguity considering chain:\n";

662 dumpChain(C);

663 });

664

665

666

667

668

669

670

671

675 Align OptimisticAlign = Align(MaxVecRegBits / 8);

676 unsigned int MaxVectorNumElems =

677 MaxVecRegBits / DL.getTypeSizeInBits(ElementType);

678

679

680

681

682

683

684 FixedVectorType *OptimisticVectorType =

686 bool TryFillGaps =

692

693

694

696 APInt OffsetOfBestAlignedElemFromLeader = C[0].OffsetFromLeader;

697 for (const auto &E : C) {

699 if (ElementAlignment > BestAlignedElemAlign) {

700 BestAlignedElemAlign = ElementAlignment;

701 OffsetOfBestAlignedElemFromLeader = E.OffsetFromLeader;

702 }

703 }

704

705 auto DeriveAlignFromBestAlignedElem = [&](APInt NewElemOffsetFromLeader) {

707 BestAlignedElemAlign,

708 (NewElemOffsetFromLeader - OffsetOfBestAlignedElemFromLeader)

710 .getLimitedValue());

711 };

712

713 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);

714

715 std::vector Ret;

716 Ret.push_back({C.front()});

717

718 unsigned ChainElemTyBits = DL.getTypeSizeInBits(getChainElemTy(C));

719 ChainElem &Prev = C[0];

720 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {

721 auto &CurChain = Ret.back();

722

723 APInt PrevSzBytes =

725 APInt PrevReadEnd = Prev.OffsetFromLeader + PrevSzBytes;

727

728

730 8 * SzBytes % ChainElemTyBits == 0 &&

731 "Every chain-element size must be a multiple of the element size after "

732 "vectorization.");

733 APInt ReadEnd = It->OffsetFromLeader + SzBytes;

734

735 bool AreContiguous = false;

736 if (It->OffsetFromLeader.sle(PrevReadEnd)) {

737

738 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();

739 if (8 * Overlap % ChainElemTyBits == 0)

740 AreContiguous = true;

741 }

742

744 << (AreContiguous ? "contiguous" : "chain-breaker")

745 << *It->Inst << " (starts at offset "

746 << It->OffsetFromLeader << ")\n");

747

748

749

750

751

752

753

754 bool GapFilled = false;

755 if (!AreContiguous && TryFillGaps && PrevSzBytes == SzBytes) {

756 APInt GapSzBytes = It->OffsetFromLeader - PrevReadEnd;

757 if (GapSzBytes == PrevSzBytes) {

758

759 ChainElem NewElem = createExtraElementAfter(

761 DeriveAlignFromBestAlignedElem(PrevReadEnd));

762 CurChain.push_back(NewElem);

763 GapFilled = true;

764 }

765

766

767

768 if ((GapSzBytes == 2 * PrevSzBytes) && (CurChain.size() % 4 == 1)) {

769 ChainElem NewElem1 = createExtraElementAfter(

771 DeriveAlignFromBestAlignedElem(PrevReadEnd));

772 ChainElem NewElem2 = createExtraElementAfter(

773 NewElem1, getLoadStoreType(Prev.Inst), PrevSzBytes, "GapFill",

774 DeriveAlignFromBestAlignedElem(PrevReadEnd + PrevSzBytes));

775 CurChain.push_back(NewElem1);

776 CurChain.push_back(NewElem2);

777 GapFilled = true;

778 }

779 }

780

781 if (AreContiguous || GapFilled)

782 CurChain.push_back(*It);

783 else

784 Ret.push_back({*It});

785

786

787

788 if (ReadEnd.sge(PrevReadEnd))

789 Prev = *It;

790 }

791

792

793 llvm::erase_if(Ret, [](const auto &Chain) { return Chain.size() <= 1; });

794 return Ret;

795}

796

797Type *Vectorizer::getChainElemTy(const Chain &C) {

799

800

801

802

803

804

805

806

807

808

809

810 if (any_of(C, [](const ChainElem &E) {

812 })) {

813 return Type::getIntNTy(

814 F.getContext(),

816 }

817

818 for (const ChainElem &E : C)

820 return T;

822}

823

824std::vector Vectorizer::splitChainByAlignment(Chain &C) {

825

826

827

828

829

830

831

832

833

834 if (C.empty())

835 return {};

836

837 sortChainInOffsetOrder(C);

838

840 dbgs() << "LSV: splitChainByAlignment considering chain:\n";

841 dumpChain(C);

842 });

843

845 auto GetVectorFactor = [&](unsigned VF, unsigned LoadStoreSize,

846 unsigned ChainSizeBytes, VectorType *VecTy) {

848 ChainSizeBytes, VecTy)

850 ChainSizeBytes, VecTy);

851 };

852

853#ifndef NDEBUG

854 for (const auto &E : C) {

857 "Should have filtered out non-power-of-two elements in "

858 "collectEquivalenceClasses.");

859 }

860#endif

861

864

865

866

867

868 bool CandidateChainsMayContainExtraLoadsStores = any_of(

869 C, [this](const ChainElem &E) { return ExtraElements.contains(E.Inst); });

870

871 std::vector Ret;

872 for (unsigned CBegin = 0; CBegin < C.size(); ++CBegin) {

873

874

875 SmallVector<std::pair<unsigned , unsigned >, 8>

876 CandidateChains;

877

878

880 APInt PrevReadEnd = C[CBegin].OffsetFromLeader + Sz;

881 for (unsigned CEnd = CBegin + 1, Size = C.size(); CEnd < Size; ++CEnd) {

882 APInt ReadEnd = C[CEnd].OffsetFromLeader +

884 unsigned BytesAdded =

885 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;

886 Sz += BytesAdded;

887 if (Sz > VecRegBytes)

888 break;

889 CandidateChains.emplace_back(CEnd, Sz);

891 }

892

893

894 for (auto It = CandidateChains.rbegin(), End = CandidateChains.rend();

895 It != End; ++It) {

896 auto [CEnd, SizeBytes] = *It;

898 dbgs() << "LSV: splitChainByAlignment considering candidate chain ["

899 << *C[CBegin].Inst << " ... " << *C[CEnd].Inst << "]\n");

900

901 Type *VecElemTy = getChainElemTy(C);

902

903

904

905 unsigned VecElemBits = DL.getTypeSizeInBits(VecElemTy);

906

907

908 assert((8 * SizeBytes) % VecElemBits == 0);

909 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;

911 unsigned VF = 8 * VecRegBytes / VecElemBits;

912

913

914 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,

915 VecElemBits * NumVecElems / 8, VecTy);

916 if (TargetVF != VF && TargetVF < NumVecElems) {

918 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "

919 "because TargetVF="

920 << TargetVF << " != VF=" << VF

921 << " and TargetVF < NumVecElems=" << NumVecElems << "\n");

922 continue;

923 }

924

925

926

927

928

929

930

931

932

933

935 bool IsAllocaAccess = AS == DL.getAllocaAddrSpace() &&

938 Align PrefAlign = Align(StackAdjustedAlignment);

939 if (IsAllocaAccess && Alignment.value() % SizeBytes != 0 &&

940 accessIsAllowedAndFast(SizeBytes, AS, PrefAlign, VecElemBits)) {

942 PtrOperand, PrefAlign, DL, C[CBegin].Inst, nullptr, &DT);

943 if (NewAlign >= Alignment) {

945 << "LSV: splitByChain upgrading alloca alignment from "

946 << Alignment.value() << " to " << NewAlign.value()

947 << "\n");

948 Alignment = NewAlign;

949 }

950 }

951

952 Chain ExtendingLoadsStores;

953 if (!accessIsAllowedAndFast(SizeBytes, AS, Alignment, VecElemBits)) {

954

955

956

957 bool AllowedAndFast = false;

958 if (NumVecElems < TargetVF && isPowerOf2\_32(NumVecElems) &&

959 VecElemBits >= 8) {

960

961

962 assert(VecElemBits % 8 == 0);

963 unsigned VecElemBytes = VecElemBits / 8;

964 unsigned NewNumVecElems = PowerOf2Ceil(NumVecElems);

965 unsigned NewSizeBytes = VecElemBytes * NewNumVecElems;

966

968 "TargetVF expected to be a power of 2");

969 assert(NewNumVecElems <= TargetVF &&

970 "Should not extend past TargetVF");

971

973 << "LSV: attempting to extend chain of " << NumVecElems

974 << " " << (IsLoadChain ? "loads" : "stores") << " to "

975 << NewNumVecElems << " elements\n");

976 bool IsLegalToExtend =

983

984

985

986 if (IsLegalToExtend &&

987 accessIsAllowedAndFast(NewSizeBytes, AS, Alignment,

988 VecElemBits)) {

990 << "LSV: extending " << (IsLoadChain ? "load" : "store")

991 << " chain of " << NumVecElems << " "

992 << (IsLoadChain ? "loads" : "stores")

993 << " with total byte size of " << SizeBytes << " to "

994 << NewNumVecElems << " "

995 << (IsLoadChain ? "loads" : "stores")

996 << " with total byte size of " << NewSizeBytes

997 << ", TargetVF=" << TargetVF << " \n");

998

999

1000

1001

1002

1003 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);

1004 for (unsigned I = 0; I < (NewNumVecElems - NumVecElems); I++) {

1005 ChainElem NewElem = createExtraElementAfter(

1006 C[CBegin], VecElemTy,

1007 APInt(ASPtrBits, SizeBytes + I * VecElemBytes), "Extend");

1008 ExtendingLoadsStores.push_back(NewElem);

1009 }

1010

1011

1012 SizeBytes = NewSizeBytes;

1013 NumVecElems = NewNumVecElems;

1014 AllowedAndFast = true;

1015 }

1016 }

1017 if (!AllowedAndFast) {

1018

1020 << "LSV: splitChainByAlignment discarding candidate chain "

1021 "because its alignment is not AllowedAndFast: "

1022 << Alignment.value() << "\n");

1023 continue;

1024 }

1025 }

1026

1027 if ((IsLoadChain &&

1029 (!IsLoadChain &&

1032 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "

1033 "because !isLegalToVectorizeLoad/StoreChain.");

1034 continue;

1035 }

1036

1037 if (CandidateChainsMayContainExtraLoadsStores) {

1038

1039

1040

1041

1042

1043

1044

1045

1046

1047 bool CurrCandContainsExtraLoadsStores = llvm::any_of(

1049 [this](const ChainElem &E) {

1050 return ExtraElements.contains(E.Inst);

1051 });

1052

1053 if (CurrCandContainsExtraLoadsStores &&

1061 << "LSV: splitChainByAlignment discarding candidate chain "

1062 "because it contains extra loads/stores that we cannot "

1063 "legally vectorize into a masked load/store \n");

1064 continue;

1065 }

1066 }

1067

1068

1070 for (unsigned I = CBegin; I <= CEnd; ++I)

1071 NewChain.emplace_back(C[I]);

1072 for (ChainElem E : ExtendingLoadsStores)

1073 NewChain.emplace_back(E);

1074 CBegin = CEnd;

1075 break;

1076 }

1077 }

1078 return Ret;

1079}

1080

1081bool Vectorizer::vectorizeChain(Chain &C) {

1082 if (C.size() < 2)

1083 return false;

1084

1085 bool ChainContainsExtraLoadsStores = llvm::any_of(

1086 C, [this](const ChainElem &E) { return ExtraElements.contains(E.Inst); });

1087

1088

1089

1090 if (C.size() == 2 && ChainContainsExtraLoadsStores)

1091 return false;

1092

1093 sortChainInOffsetOrder(C);

1094

1096 dbgs() << "LSV: Vectorizing chain of " << C.size() << " instructions:\n";

1097 dumpChain(C);

1098 });

1099

1100 Type *VecElemTy = getChainElemTy(C);

1103 unsigned BytesAdded = DL.getTypeStoreSize(getLoadStoreType(&*C[0].Inst));

1104 APInt PrevReadEnd = C[0].OffsetFromLeader + BytesAdded;

1105 unsigned ChainBytes = BytesAdded;

1106 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {

1107 unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst));

1108 APInt ReadEnd = It->OffsetFromLeader + SzBytes;

1109

1110 BytesAdded =

1111 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;

1112 ChainBytes += BytesAdded;

1114 }

1115

1116 assert(8 * ChainBytes % DL.getTypeSizeInBits(VecElemTy) == 0);

1117

1118

1119 unsigned NumElem = 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy);

1121

1123

1124

1125 if (AS == DL.getAllocaAddrSpace()) {

1126 Alignment = std::max(

1127 Alignment,

1129 MaybeAlign(), DL, C[0].Inst, nullptr, &DT));

1130 }

1131

1132

1133#ifndef NDEBUG

1134 for (const ChainElem &E : C)

1136 DL.getTypeStoreSize(VecElemTy));

1137#endif

1138

1140 if (IsLoadChain) {

1141

1142

1145 return A.Inst->comesBefore(B.Inst);

1146 })->Inst);

1147

1148

1149

1150 if (ChainContainsExtraLoadsStores) {

1156 } else {

1157

1158

1159 if (NumElem == 1)

1160 VecTy = VecElemTy;

1161

1162

1165 }

1166

1167 for (const ChainElem &E : C) {

1171 unsigned EOffset =

1172 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();

1173 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);

1175 V = VecInst;

1178 llvm::seq(VecIdx, VecIdx + VT->getNumElements()));

1180 } else {

1183 }

1184 if (V->getType() != I->getType())

1187 }

1188

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200

1201

1202

1203

1204

1205

1206

1207

1208 reorder(VecInst);

1209 } else {

1210

1212 return A.Inst->comesBefore(B.Inst);

1213 })->Inst);

1214

1215

1217 auto InsertElem = [&](Value *V, unsigned VecIdx) {

1218 if (V->getType() != VecElemTy)

1221 };

1222 for (const ChainElem &E : C) {

1224 unsigned EOffset =

1225 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();

1226 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);

1227 if (FixedVectorType *VT =

1229 for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) {

1232 VecIdx++);

1233 }

1234 } else {

1235 InsertElem(I->getValueOperand(), VecIdx);

1236 }

1237 }

1238

1239

1240

1241 if (ChainContainsExtraLoadsStores) {

1248 } else {

1249

1250

1253 }

1254 }

1255

1257

1258 for (const ChainElem &E : C)

1259 ToErase.emplace_back(E.Inst);

1260

1261 ++NumVectorInstructions;

1262 NumScalarsVectorized += C.size();

1263 return true;

1264}

1265

1266template

1267bool Vectorizer::isSafeToMove(

1268 Instruction *ChainElem, Instruction *ChainBegin,

1269 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,

1270 BatchAAResults &BatchAA) {

1271 LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "

1272 << *ChainBegin << ")\n");

1273

1275 if (ChainElem == ChainBegin)

1276 return true;

1277

1278

1279

1281 return true;

1282

1283 auto BBIt = std::next([&] {

1284 if constexpr (IsLoadChain)

1286 else

1288 }());

1289 auto BBItEnd = std::next([&] {

1290 if constexpr (IsLoadChain)

1292 else

1294 }());

1295

1296 const APInt &ChainElemOffset = ChainOffsets.at(ChainElem);

1297 const unsigned ChainElemSize =

1299

1300 for (; BBIt != BBItEnd; ++BBIt) {

1302

1303 if (I->mayReadOrWriteMemory())

1304 continue;

1305

1306

1308 continue;

1309

1310

1312 continue;

1313

1314

1315

1316

1317

1318

1319

1320 if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {

1321

1322

1323

1324

1325

1326

1327 const APInt &IOffset = OffsetIt->second;

1329 if (IOffset == ChainElemOffset ||

1330 (IOffset.sle(ChainElemOffset) &&

1331 (IOffset + IElemSize).sgt(ChainElemOffset)) ||

1332 (ChainElemOffset.sle(IOffset) &&

1333 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {

1335

1336

1340 dbgs() << "LSV: Found alias in chain: " << *I << "\n";

1341 });

1342 return false;

1343 }

1344

1345 continue;

1346 }

1347

1348 LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");

1352 << " Aliasing instruction:\n"

1353 << " " << *I << '\n'

1354 << " Aliased instruction and pointer:\n"

1355 << " " << *ChainElem << '\n'

1357 << '\n');

1358

1359 return false;

1360 }

1361 }

1362 return true;

1363}

1364

1370

1372 unsigned MatchingOpIdxA, Instruction *AddOpB,

1373 unsigned MatchingOpIdxB, bool Signed) {

1374 LLVM_DEBUG(dbgs() << "LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff

1375 << ", AddOpA=" << *AddOpA << ", MatchingOpIdxA="

1376 << MatchingOpIdxA << ", AddOpB=" << *AddOpB

1377 << ", MatchingOpIdxB=" << MatchingOpIdxB

1378 << ", Signed=" << Signed << "\n");

1379

1380

1381

1382

1383

1384

1385

1386

1387

1388

1389

1390

1391

1392

1394 AddOpB->getOpcode() == Instruction::Add &&

1396 if (AddOpA->getOperand(MatchingOpIdxA) ==

1397 AddOpB->getOperand(MatchingOpIdxB)) {

1398 Value *OtherOperandA = AddOpA->getOperand(MatchingOpIdxA == 1 ? 0 : 1);

1399 Value *OtherOperandB = AddOpB->getOperand(MatchingOpIdxB == 1 ? 0 : 1);

1402

1403 if (OtherInstrB && OtherInstrB->getOpcode() == Instruction::Add &&

1406 int64_t CstVal =

1408 if (OtherInstrB->getOperand(0) == OtherOperandA &&

1410 return true;

1411 }

1412

1413 if (OtherInstrA && OtherInstrA->getOpcode() == Instruction::Add &&

1416 int64_t CstVal =

1418 if (OtherInstrA->getOperand(0) == OtherOperandB &&

1420 return true;

1421 }

1422

1423

1424 if (OtherInstrA && OtherInstrB &&

1425 OtherInstrA->getOpcode() == Instruction::Add &&

1426 OtherInstrB->getOpcode() == Instruction::Add &&

1431 int64_t CstValA =

1433 int64_t CstValB =

1436 IdxDiff.getSExtValue() == (CstValB - CstValA))

1437 return true;

1438 }

1439 }

1440 return false;

1441}

1442

1443std::optional Vectorizer::getConstantOffsetComplexAddrs(

1444 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {

1445 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA

1446 << " PtrB=" << *PtrB << " ContextInst=" << *ContextInst

1447 << " Depth=" << Depth << "\n");

1450 if (!GEPA || !GEPB)

1451 return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth);

1452

1453

1454

1455 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||

1456 GEPA->getPointerOperand() != GEPB->getPointerOperand())

1457 return std::nullopt;

1460 for (unsigned I = 0, E = GEPA->getNumIndices() - 1; I < E; ++I) {

1462 return std::nullopt;

1463 ++GTIA;

1464 ++GTIB;

1465 }

1466

1471 return std::nullopt;

1472

1474

1475

1477 return std::nullopt;

1478

1480

1481

1485 return std::nullopt;

1486

1487 const SCEV *OffsetSCEVA = SE.getSCEV(ValA);

1488 const SCEV *OffsetSCEVB = SE.getSCEV(OpB);

1489 const SCEV *IdxDiffSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);

1491 return std::nullopt;

1492

1493 ConstantRange IdxDiffRange = SE.getSignedRange(IdxDiffSCEV);

1495 return std::nullopt;

1497

1498 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff

1499 << "\n");

1500

1501

1502 bool Safe = false;

1503

1504

1505

1506 if (OpB->getOpcode() == Instruction::Add &&

1510 Safe = true;

1511

1512

1513

1515 if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&

1518

1519

1520

1521 for (unsigned MatchingOpIdxA : {0, 1})

1522 for (unsigned MatchingOpIdxB : {0, 1})

1523 if (!Safe)

1525 MatchingOpIdxB, Signed);

1526 }

1527

1529

1530

1531

1532

1533

1534

1535

1536

1537 if (!Safe) {

1538

1539

1542 &DT);

1543 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());

1546 Safe = BitsAllowedToBeSet.uge(IdxDiff.abs());

1547 }

1548

1549 if (Safe)

1550 return IdxDiff * Stride;

1551 return std::nullopt;

1552}

1553

1554std::optional Vectorizer::getConstantOffsetSelects(

1555 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {

1556 if (Depth++ == MaxDepth)

1557 return std::nullopt;

1558

1561 if (SelectA->getCondition() != SelectB->getCondition())

1562 return std::nullopt;

1563 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA

1564 << ", PtrB=" << *PtrB << ", ContextInst="

1565 << *ContextInst << ", Depth=" << Depth << "\n");

1566 std::optional TrueDiff = getConstantOffset(

1567 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth);

1568 if (!TrueDiff)

1569 return std::nullopt;

1570 std::optional FalseDiff =

1571 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),

1572 ContextInst, Depth);

1573 if (TrueDiff == FalseDiff)

1574 return TrueDiff;

1575 }

1576 }

1577 return std::nullopt;

1578}

1579

1580void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {

1581 if (EQClasses.size() < 2)

1582 return;

1583

1584

1585

1586 static_assert(std::tuple_size_v == 4,

1587 "EqClassKey has changed - EqClassReducedKey needs changes too");

1588 using EqClassReducedKey =

1589 std::tuple<std::tuple_element_t<1, EqClassKey> ,

1590 std::tuple_element_t<2, EqClassKey> ,

1591 std::tuple_element_t<3, EqClassKey> >;

1592 using ECReducedKeyToUnderlyingObjectMap =

1593 MapVector<EqClassReducedKey,

1594 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;

1595

1596

1597

1598

1599 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;

1600 bool FoundPotentiallyOptimizableEC = false;

1601 for (const auto &EC : EQClasses) {

1602 const auto &Key = EC.first;

1603 EqClassReducedKey RedKey{std::get<1>(Key), std::get<2>(Key),

1604 std::get<3>(Key)};

1605 auto &UOMap = RedKeyToUOMap[RedKey];

1607 if (UOMap.size() > 1)

1608 FoundPotentiallyOptimizableEC = true;

1609 }

1610 if (!FoundPotentiallyOptimizableEC)

1611 return;

1612

1614 dbgs() << "LSV: mergeEquivalenceClasses: before merging:\n";

1615 for (const auto &EC : EQClasses) {

1616 dbgs() << " Key: {" << EC.first << "}\n";

1617 for (const auto &Inst : EC.second)

1618 dbgs() << " Inst: " << *Inst << '\n';

1619 }

1620 });

1622 dbgs() << "LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";

1623 for (const auto &RedKeyToUO : RedKeyToUOMap) {

1624 dbgs() << " Reduced key: {" << std::get<0>(RedKeyToUO.first) << ", "

1625 << std::get<1>(RedKeyToUO.first) << ", "

1626 << static_cast<int>(std::get<2>(RedKeyToUO.first)) << "} --> "

1627 << RedKeyToUO.second.size() << " underlying objects:\n";

1628 for (auto UObject : RedKeyToUO.second)

1629 dbgs() << " " << *UObject << '\n';

1630 }

1631 });

1632

1633 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;

1634

1635

1636 auto GetUltimateTargets =

1637 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {

1638 UObjectToUObjectMap IndirectionMap;

1639 for (const auto *UObject : UObjects) {

1640 const unsigned MaxLookupDepth = 1;

1641 const auto *UltimateTarget = getUnderlyingObject(UObject, MaxLookupDepth);

1642 if (UltimateTarget != UObject)

1643 IndirectionMap[UObject] = UltimateTarget;

1644 }

1645 UObjectToUObjectMap UltimateTargetsMap;

1646 for (const auto *UObject : UObjects) {

1647 auto Target = UObject;

1648 auto It = IndirectionMap.find(Target);

1649 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))

1650 Target = It->second;

1651 UltimateTargetsMap[UObject] = Target;

1652 }

1653 return UltimateTargetsMap;

1654 };

1655

1656

1657

1658 for (auto &[RedKey, UObjects] : RedKeyToUOMap) {

1659 if (UObjects.size() < 2)

1660 continue;

1661 auto UTMap = GetUltimateTargets(UObjects);

1662 for (const auto &[UObject, UltimateTarget] : UTMap) {

1663 if (UObject == UltimateTarget)

1664 continue;

1665

1666 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),

1667 std::get<2>(RedKey)};

1668 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),

1669 std::get<2>(RedKey)};

1670

1671

1672 const auto &VecTo = EQClasses[KeyTo];

1673 const auto &VecFrom = EQClasses[KeyFrom];

1674 SmallVector<Instruction *, 8> MergedVec;

1675 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),

1676 std::back_inserter(MergedVec),

1677 [](Instruction *A, Instruction *B) {

1678 return A && B && A->comesBefore(B);

1679 });

1680 EQClasses[KeyTo] = std::move(MergedVec);

1681 EQClasses.erase(KeyFrom);

1682 }

1683 }

1685 dbgs() << "LSV: mergeEquivalenceClasses: after merging:\n";

1686 for (const auto &EC : EQClasses) {

1687 dbgs() << " Key: {" << EC.first << "}\n";

1688 for (const auto &Inst : EC.second)

1689 dbgs() << " Inst: " << *Inst << '\n';

1690 }

1691 });

1692}

1693

1694EquivalenceClassMap

1697 EquivalenceClassMap Ret;

1698

1699 auto GetUnderlyingObject = [](const Value *Ptr) -> const Value * {

1702

1703

1704

1705

1706

1707

1708 return Sel->getCondition();

1709 }

1710 return ObjPtr;

1711 };

1712

1713 for (Instruction &I : make_range(Begin, End)) {

1716 if (!LI && !SI)

1717 continue;

1718

1719 if ((LI && !LI->isSimple()) || (SI && SI->isSimple()))

1720 continue;

1721

1724 continue;

1725

1727 if (!VectorType::isValidElementType(Ty->getScalarType()))

1728 continue;

1729

1730

1731

1732 unsigned TySize = DL.getTypeSizeInBits(Ty);

1733 if ((TySize % 8) != 0)

1734 continue;

1735

1736

1737

1738

1739

1741 continue;

1742

1746

1747 unsigned VF = VecRegSize / TySize;

1749

1750

1751 if ((!VecTy && isPowerOf2\_32(DL.getTypeSizeInBits(Ty))) ||

1752 (VecTy && isPowerOf2\_32(DL.getTypeSizeInBits(VecTy->getScalarType()))))

1753 continue;

1754

1755

1756 if (TySize > VecRegSize / 2 ||

1758 continue;

1759

1760 Ret[{GetUnderlyingObject(Ptr), AS,

1762 LI != nullptr}]

1763 .emplace_back(&I);

1764 }

1765

1766 mergeEquivalenceClasses(Ret);

1767 return Ret;

1768}

1769

1771 if (Instrs.empty())

1772 return {};

1773

1775 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);

1776

1777#ifndef NDEBUG

1778

1779 for (size_t I = 1; I < Instrs.size(); ++I) {

1780 assert(Instrs[I - 1]->comesBefore(Instrs[I]));

1782 }

1783#endif

1784

1785

1786

1787

1788

1789 struct InstrListElem : ilist_node,

1790 std::pair<Instruction *, Chain> {

1791 explicit InstrListElem(Instruction *I)

1793 };

1794 struct InstrListElemDenseMapInfo {

1795 using PtrInfo = DenseMapInfo<InstrListElem *>;

1796 using IInfo = DenseMapInfo<Instruction *>;

1797 static InstrListElem *getEmptyKey() { return PtrInfo::getEmptyKey(); }

1798 static InstrListElem *getTombstoneKey() {

1799 return PtrInfo::getTombstoneKey();

1800 }

1801 static unsigned getHashValue(const InstrListElem *E) {

1802 return IInfo::getHashValue(E->first);

1803 }

1804 static bool isEqual(const InstrListElem *A, const InstrListElem *B) {

1805 if (A == getEmptyKey() || B == getEmptyKey())

1806 return A == getEmptyKey() && B == getEmptyKey();

1807 if (A == getTombstoneKey() || B == getTombstoneKey())

1808 return A == getTombstoneKey() && B == getTombstoneKey();

1809 return IInfo::isEqual(A->first, B->first);

1810 }

1811 };

1812 SpecificBumpPtrAllocator Allocator;

1813 simple_ilist MRU;

1814 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;

1815

1816

1817

1818

1819 for (Instruction *I : Instrs) {

1820 constexpr int MaxChainsToTry = 64;

1821

1822 bool MatchFound = false;

1823 auto ChainIter = MRU.begin();

1824 for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();

1825 ++J, ++ChainIter) {

1826 if (std::optional Offset = getConstantOffset(

1829

1830 (ChainIter->first->comesBefore(I) ? I : ChainIter->first))) {

1831

1832

1833 ChainIter->second.emplace_back(I, Offset.value());

1834

1835 MRU.remove(*ChainIter);

1837 MatchFound = true;

1838 break;

1839 }

1840 }

1841

1842 if (!MatchFound) {

1843 APInt ZeroOffset(ASPtrBits, 0);

1844 InstrListElem *E = new (Allocator.Allocate()) InstrListElem(I);

1845 E->second.emplace_back(I, ZeroOffset);

1848 }

1849 }

1850

1851 std::vector Ret;

1852 Ret.reserve(Chains.size());

1853

1854 for (auto &E : MRU)

1855 if (E.second.size() > 1)

1856 Ret.emplace_back(std::move(E.second));

1857 return Ret;

1858}

1859

1860std::optional Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,

1861 Instruction *ContextInst,

1862 unsigned Depth) {

1863 LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA

1864 << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst

1865 << ", Depth=" << Depth << "\n");

1866

1867

1868 unsigned OrigBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());

1869 APInt OffsetA(OrigBitWidth, 0);

1870 APInt OffsetB(OrigBitWidth, 0);

1873 unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());

1874 if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))

1875 return std::nullopt;

1876

1877

1878

1879

1880 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&

1881 OffsetB.getSignificantBits() <= NewPtrBitWidth);

1882

1883 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);

1884 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);

1885 if (PtrA == PtrB)

1886 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);

1887

1888

1891 LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");

1892 ConstantRange DistRange = SE.getSignedRange(DistScev);

1894

1895

1897 return (OffsetB - OffsetA + Dist).sextOrTrunc(OrigBitWidth);

1898 }

1899 }

1900 if (std::optional Diff =

1901 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth))

1902 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))

1903 .sextOrTrunc(OrigBitWidth);

1904 return std::nullopt;

1905}

1906

1907bool Vectorizer::accessIsAllowedAndFast(unsigned SizeBytes, unsigned AS,

1908 Align Alignment,

1909 unsigned VecElemBits) const {

1910

1911 if (Alignment.value() % SizeBytes == 0)

1912 return true;

1913

1914

1915 unsigned VectorizedSpeed = 0;

1917 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);

1918 if (!AllowsMisaligned) {

1920 dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace " << AS

1921 << " with alignment " << Alignment.value()

1922 << " is misaligned, and therefore can't be vectorized.\n");

1923 return false;

1924 }

1925

1926 unsigned ElementwiseSpeed = 0;

1927 (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,

1928 Alignment, &ElementwiseSpeed);

1929 if (VectorizedSpeed < ElementwiseSpeed) {

1930 LLVM_DEBUG(dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace "

1931 << AS << " with alignment " << Alignment.value()

1932 << " has relative speed " << VectorizedSpeed

1933 << ", which is lower than the elementwise speed of "

1934 << ElementwiseSpeed

1935 << ". Therefore this access won't be vectorized.\n");

1936 return false;

1937 }

1938 return true;

1939}

1940

1941ChainElem Vectorizer::createExtraElementAfter(const ChainElem &Prev, Type *Ty,

1942 APInt Offset, StringRef Prefix,

1943 Align Alignment) {

1948 PrevLoad->getPointerOperand(), Builder.getInt(Offset), Prefix + "GEP");

1949 LLVM_DEBUG(dbgs() << "LSV: Extra GEP Created: \n" << *NewGep << "\n");

1950 NewElement = Builder.CreateAlignedLoad(Ty, NewGep, Alignment, Prefix);

1951 } else {

1953

1956 LLVM_DEBUG(dbgs() << "LSV: Extra GEP Created: \n" << *NewGep << "\n");

1957 NewElement =

1959 }

1960

1961

1962

1964

1965

1966 ExtraElements.insert(NewElement);

1967

1968 APInt NewOffsetFromLeader = Prev.OffsetFromLeader + Offset;

1969 LLVM_DEBUG(dbgs() << "LSV: Extra Element Created: \n"

1970 << *NewElement

1971 << " OffsetFromLeader: " << NewOffsetFromLeader << "\n");

1972 return ChainElem{NewElement, NewOffsetFromLeader};

1973}

1974

1976 FixedVectorType *VecTy) {

1977

1979 Builder.getInt1(false));

1980

1981

1982 for (const ChainElem &E : C) {

1983 if (ExtraElements.contains(E.Inst))

1984 continue;

1985 unsigned EOffset =

1986 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();

1987 unsigned VecIdx =

1988 8 * EOffset / DL.getTypeSizeInBits(VecTy->getScalarType());

1989 if (FixedVectorType *VT =

1991 for (unsigned J = 0; J < VT->getNumElements(); ++J)

1992 MaskElts[VecIdx + J] = Builder.getInt1(true);

1993 else

1994 MaskElts[VecIdx] = Builder.getInt1(true);

1995 }

1997}

1998

1999void Vectorizer::deleteExtraElements() {

2000 for (auto *ExtraElement : ExtraElements) {

2002 [[maybe_unused]] bool Deleted =

2004 assert(Deleted && "Extra Load should always be trivially dead");

2005 } else {

2006

2007

2008

2010 ExtraElement->eraseFromParent();

2012 }

2013 }

2014

2015 ExtraElements.clear();

2016}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static bool isEqual(const Function &Caller, const Function &Callee)

This file contains the simple types necessary to represent the attributes associated with functions a...

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

static bool runOnFunction(Function &F, bool PostInlining)

Module.h This file contains the declarations for the Module class.

static bool checkNoWrapFlags(Instruction *I, bool Signed)

Definition LoadStoreVectorizer.cpp:1365

static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)

Definition LoadStoreVectorizer.cpp:1371

This file implements a map that provides insertion order iteration.

This file provides utility analysis objects describing memory locations.

static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)

Check if it's safe to move From down to To, checking that no physical registers are clobbered.

Provides some synthesis utilities to produce sequences of values.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This pass exposes codegen information to IR-level passes.

A manager for alias analyses.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

Class for arbitrary precision integers.

void clearBit(unsigned BitPosition)

Set a given bit to 0.

APInt abs() const

Get the absolute value.

unsigned getBitWidth() const

Return the number of bits in the APInt.

bool sle(const APInt &RHS) const

Signed less or equal comparison.

LLVM_ABI APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

bool sge(const APInt &RHS) const

Signed greater or equal comparison.

int64_t getSExtValue() const

Get sign extended value.

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

InstListType::reverse_iterator reverse_iterator

InstListType::iterator iterator

Instruction iterators...

ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)

Represents analyses that only rely on functions' control flow.

const APInt * getSingleElement() const

If this set contains a single element, return it, otherwise return null.

bool isSingleElement() const

Return true if this set contains exactly one member.

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

ValueT & at(const_arg_type_t< KeyT > Val)

at - Return the entry for the specified key, or abort if no such entry exists.

iterator find(const_arg_type_t< KeyT > Val)

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

unsigned getNumElements() const

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

FunctionPass class - This class is used to implement most global optimizations.

Legacy wrapper pass to provide the GlobalsAAResult object.

ConstantInt * getInt1(bool V)

Get a constant value representing either true or false.

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")

Create a call to Masked Load intrinsic.

Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())

ConstantInt * getInt32(uint32_t C)

Get a constant 32-bit value.

Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)

Create a call to Masked Store intrinsic.

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)

ConstantInt * getInt(const APInt &AI)

Get a constant integer value.

LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY

Determine whether the no unsigned wrap flag is set.

LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY

Determine whether the no signed wrap flag is set.

bool hasMetadata() const

Return true if this instruction has any metadata attached to it.

LLVM_ABI void moveBefore(InstListType::iterator InsertPos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

LLVM_ABI bool comesBefore(const Instruction *Other) const

Given an instruction Other in the same basic block as this instruction, return true if this instructi...

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

An instruction for reading from memory.

LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition LoadStoreVectorizer.cpp:440

This class implements a map that also provides access to all stored values in a deterministic order.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

static LLVM_ABI MemoryLocation get(const LoadInst *LI)

Return a location with information about the memory reference by the given instruction.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

Legacy wrapper pass to provide the SCEVAAResult object.

Analysis pass that exposes the ScalarEvolution for a function.

The main scalar evolution driver.

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

ConstantRange getSignedRange(const SCEV *S)

Determine the signed range for a particular SCEV.

LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

LLVM_ABI const SCEV * getCouldNotCompute()

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

reference emplace_back(ArgTypes &&... Args)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Value * getPointerOperand()

Analysis pass providing the TargetTransformInfo.

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI bool isLegalToVectorizeLoad(LoadInst *LI) const

LLVM_ABI bool isLegalToVectorizeStore(StoreInst *SI) const

LLVM_ABI bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, MaskKind MaskKind=VariableOrConstantMask) const

Return true if the target supports masked store.

LLVM_ABI unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

LLVM_ABI bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

LLVM_ABI unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const

LLVM_ABI unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

LLVM_ABI bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const

Determine if the target supports unaligned memory accesses.

LLVM_ABI bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

LLVM_ABI bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, MaskKind MaskKind=VariableOrConstantMask) const

Return true if the target supports masked load.

bool isVectorTy() const

True if this is an instance of VectorType.

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

Value * getOperand(unsigned i) const

unsigned getNumOperands() const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const

This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI const Value * stripPointerCasts() const

Strip off pointer casts, all-zero GEPs and address space casts.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

TypeSize getSequentialElementStride(const DataLayout &DL) const

Value * getOperand() const

const ParentTy * getParent() const

NodeTy * getNextNode()

Get the next node, or nullptr for the list tail.

This class implements an extremely fast bulk output stream that can only output to a stream.

void push_front(reference Node)

Insert a node at the front; never copies.

void remove(reference N)

Remove a node by reference; never deletes.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

Abstract Attribute helper functions.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

const APInt & smax(const APInt &A, const APInt &B)

Determine the larger of two APInts considered to be signed.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

ElementType

The element type of an SRV or UAV resource.

Context & getContext() const

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

auto min_element(R &&Range)

Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...

unsigned getLoadStoreAddressSpace(const Value *I)

A helper function that returns the address space of the pointer operand of load or store instruction.

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

APFloat abs(APFloat X)

Returns the absolute value of the argument.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

LLVM_ABI Pass * createLoadStoreVectorizerPass()

Create a legacy pass manager instance of the LoadStoreVectorizer pass.

iterator_range< po_iterator< T > > post_order(const T &G)

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)

Try to ensure that the alignment of V is at least PrefAlign bytes.

bool isModSet(const ModRefInfo MRI)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

generic_gep_type_iterator<> gep_type_iterator

bool isModOrRefSet(const ModRefInfo MRI)

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key

ModRefInfo

Flags indicating whether a memory access modifies or references memory.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

LLVM_ABI void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

ArrayRef(const T &OneElt) -> ArrayRef< T >

constexpr unsigned BitWidth

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)

Return true if this function can prove that the instruction I will always transfer execution to one o...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

gep_type_iterator gep_type_begin(const User *GEP)

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

AAResults AliasAnalysis

Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.