LLVM: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
106#include
107#include
108#include
109#include
110#include
111#include
112#include
113#include
114#include <type_traits>
115#include
116#include
117
118using namespace llvm;
119
120#define DEBUG_TYPE "load-store-vectorizer"
121
122STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
124
125namespace {
126
127
128
129
130
131
132using EqClassKey =
133 std::tuple<const Value * ,
134 unsigned ,
135 unsigned ,
136 char
137 >;
139 const EqClassKey &K) {
140 const auto &[UnderlyingObject, AddrSpace, ElementSize, IsLoad] = K;
141 return OS << (IsLoad ? "load" : "store") << " of " << *UnderlyingObject
142 << " of element size " << ElementSize << " bits in addrspace "
143 << AddrSpace;
144}
145
146
147
148
149
150
151
152
153
154
155
156
157struct ChainElem {
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
162};
164
165void sortChainInBBOrder(Chain &C) {
166 sort(C, [](auto &A, auto &B) { return A.Inst->comesBefore(B.Inst); });
167}
168
169void sortChainInOffsetOrder(Chain &C) {
170 sort(C, [](const auto &A, const auto &B) {
171 if (A.OffsetFromLeader != B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(B.OffsetFromLeader);
173 return A.Inst->comesBefore(B.Inst);
174 });
175}
176
179 dbgs() << " " << *E.Inst << " (offset " << E.OffsetFromLeader << ")\n";
180 }
181}
182
183using EquivalenceClassMap =
185
186
187constexpr unsigned StackAdjustedAlignment = 4;
188
191 for (const ChainElem &E : C)
194}
195
198 return LI != nullptr && LI->hasMetadata(LLVMContext::MD_invariant_load);
199}
200
201
202
206
208 while (!Worklist.empty()) {
211 for (int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->getOpcode() == Instruction::PHI)
214 continue;
215
216
217
218 if (IM->getParent() != I->getParent())
219 continue;
220
221 assert(IM != I && "Unexpected cycle while re-ordering instructions");
222
224 InstructionsToMove.insert(IM);
226 }
227 }
228 }
229
230
231 for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;) {
233 if (!InstructionsToMove.contains(IM))
234 continue;
236 }
237}
238
239class Vectorizer {
242 AssumptionCache &AC;
243 DominatorTree &DT;
244 ScalarEvolution &SE;
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
248
249
250
251
252
254
255public:
256 Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,
257 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
258 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
259 DL(F.getDataLayout()), Builder(SE.getContext()) {}
260
261 bool run();
262
263private:
264 static const unsigned MaxDepth = 3;
265
266
267
268
270
271
272
273 bool runOnEquivalenceClass(const EqClassKey &EqClassKey,
275
276
277
278
279 bool runOnChain(Chain &C);
280
281
282
283
284 std::vector splitChainByContiguity(Chain &C);
285
286
287
288
289 std::vector splitChainByMayAliasInstrs(Chain &C);
290
291
292
293 std::vector splitChainByAlignment(Chain &C);
294
295
296
297 bool vectorizeChain(Chain &C);
298
299
300 std::optional getConstantOffset(Value *PtrA, Value *PtrB,
301 Instruction *ContextInst,
302 unsigned Depth = 0);
303 std::optional getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,
304 Instruction *ContextInst,
306 std::optional getConstantOffsetSelects(Value *PtrA, Value *PtrB,
307 Instruction *ContextInst,
309
310
311
312
313 Type *getChainElemTy(const Chain &C);
314
315
316
317
318
319
320
321
322 template
324 Instruction *ChainElem, Instruction *ChainBegin,
325 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
326 BatchAAResults &BatchAA);
327
328
329
330
331 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const;
332
333
334
335
336
337
340
341
342
343
344
345
347};
348
349class LoadStoreVectorizerLegacyPass : public FunctionPass {
350public:
351 static char ID;
352
353 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
356 }
357
359
360 StringRef getPassName() const override {
361 return "GPU Load and Store Vectorizer";
362 }
363
364 void getAnalysisUsage(AnalysisUsage &AU) const override {
366 AU.addRequired();
367 AU.addRequired();
368 AU.addRequired();
369 AU.addRequired();
371 }
372};
373
374}
375
376char LoadStoreVectorizerLegacyPass::ID = 0;
377
379 "Vectorize load and Store instructions", false, false)
387 "Vectorize load and store instructions", false, false)
388
390 return new LoadStoreVectorizerLegacyPass();
391}
392
393bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {
394
395 if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
396 return false;
397
398 AliasAnalysis &AA = getAnalysis().getAAResults();
399 DominatorTree &DT = getAnalysis().getDomTree();
400 ScalarEvolution &SE = getAnalysis().getSE();
401 TargetTransformInfo &TTI =
402 getAnalysis().getTTI(F);
403
404 AssumptionCache &AC =
405 getAnalysis().getAssumptionCache(F);
406
407 return Vectorizer(F, AA, AC, DT, SE, TTI).run();
408}
409
412
413 if (F.hasFnAttribute(Attribute::NoImplicitFloat))
415
421
422 bool Changed = Vectorizer(F, AA, AC, DT, SE, TTI).run();
426}
427
428bool Vectorizer::run() {
430
431
432
433
434
435
436
437
438
439
440
441
442
443
445
446 assert(!BB->empty());
447
454
455 for (auto It = Barriers.begin(), End = std::prev(Barriers.end()); It != End;
456 ++It)
457 Changed |= runOnPseudoBB(*It, *std::next(It));
458
461 if (I->use_empty())
462 I->eraseFromParent();
464 }
465 ToErase.clear();
466 }
467
469}
470
474 dbgs() << "LSV: Running on pseudo-BB [" << *Begin << " ... ";
475 if (End != Begin->getParent()->end())
476 dbgs() << *End;
477 else
478 dbgs() << "";
479 dbgs() << ")\n";
480 });
481
483 for (const auto &[EqClassKey, EqClass] :
484 collectEquivalenceClasses(Begin, End))
485 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
486
488}
489
490bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,
493
495 dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()
496 << " keyed on " << EqClassKey << ":\n";
497 for (Instruction *I : EqClass)
498 dbgs() << " " << *I << "\n";
499 });
500
501 std::vector Chains = gatherChains(EqClass);
503 << " nontrivial chains.\n";);
504 for (Chain &C : Chains)
507}
508
509bool Vectorizer::runOnChain(Chain &C) {
511 dbgs() << "LSV: Running on chain with " << C.size() << " instructions:\n";
512 dumpChain(C);
513 });
514
515
516
517
518
519
520
522 for (auto &C : splitChainByMayAliasInstrs(C))
523 for (auto &C : splitChainByContiguity(C))
524 for (auto &C : splitChainByAlignment(C))
525 Changed |= vectorizeChain(C);
527}
528
529std::vector Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
530 if (C.empty())
531 return {};
532
533 sortChainInBBOrder(C);
534
536 dbgs() << "LSV: splitChainByMayAliasInstrs considering chain:\n";
537 dumpChain(C);
538 });
539
540
541
542
543 DenseMap<Instruction *, APInt > ChainOffsets;
545 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
546
547
548
549 BatchAAResults BatchAA(AA);
550
551
552
553
554
555
556
557
558
559
560
561
562 auto Impl = [&](auto IsLoad) {
563
564 auto [ChainBegin, ChainEnd] = [&](auto IsLoad) {
565 if constexpr (IsLoad())
566 return std::make_pair(C.begin(), C.end());
567 else
568 return std::make_pair(C.rbegin(), C.rend());
569 }(IsLoad);
570 assert(ChainBegin != ChainEnd);
571
572 std::vector Chains;
575 for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
577 ChainOffsets, BatchAA)) {
578 LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "
579 << *ChainIt->Inst << " into " << *ChainBegin->Inst
580 << "\n");
582 } else {
584 dbgs() << "LSV: Found intervening may-alias instrs; cannot merge "
585 << *ChainIt->Inst << " into " << *ChainBegin->Inst << "\n");
586 if (NewChain.size() > 1) {
588 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";
589 dumpChain(NewChain);
590 });
591 Chains.emplace_back(std::move(NewChain));
592 }
593
594
596 }
597 }
598 if (NewChain.size() > 1) {
600 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";
601 dumpChain(NewChain);
602 });
603 Chains.emplace_back(std::move(NewChain));
604 }
605 return Chains;
606 };
607
609 return Impl(std::bool_constant());
610
612 return Impl(std::bool_constant());
613}
614
615std::vector Vectorizer::splitChainByContiguity(Chain &C) {
616 if (C.empty())
617 return {};
618
619 sortChainInOffsetOrder(C);
620
622 dbgs() << "LSV: splitChainByContiguity considering chain:\n";
623 dumpChain(C);
624 });
625
626 std::vector Ret;
627 Ret.push_back({C.front()});
628
629 unsigned ChainElemTyBits = DL.getTypeSizeInBits(getChainElemTy(C));
630 APInt PrevReadEnd = C[0].OffsetFromLeader +
632 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
633 auto &CurChain = Ret.back();
634 unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst));
635
636
638 8 * SzBytes % ChainElemTyBits == 0 &&
639 "Every chain-element size must be a multiple of the element size after "
640 "vectorization.");
641 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
642
643 bool AreContiguous = false;
644 if (It->OffsetFromLeader.sle(PrevReadEnd)) {
645
646 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();
647 if (8 * Overlap % ChainElemTyBits == 0)
648 AreContiguous = true;
649 }
650
652 << (AreContiguous ? "contiguous" : "chain-breaker")
653 << *It->Inst << " (starts at offset "
654 << It->OffsetFromLeader << ")\n");
655
656 if (AreContiguous)
657 CurChain.push_back(*It);
658 else
659 Ret.push_back({*It});
661 }
662
663
664 llvm::erase_if(Ret, [](const auto &Chain) { return Chain.size() <= 1; });
665 return Ret;
666}
667
668Type *Vectorizer::getChainElemTy(const Chain &C) {
670
671
672
673
674
675
676
677
678
679
680
681 if (any_of(C, [](const ChainElem &E) {
683 })) {
684 return Type::getIntNTy(
685 F.getContext(),
687 }
688
689 for (const ChainElem &E : C)
691 return T;
693}
694
695std::vector Vectorizer::splitChainByAlignment(Chain &C) {
696
697
698
699
700
701
702
703
704
705 if (C.empty())
706 return {};
707
708 sortChainInOffsetOrder(C);
709
711 dbgs() << "LSV: splitChainByAlignment considering chain:\n";
712 dumpChain(C);
713 });
714
716 auto GetVectorFactor = [&](unsigned VF, unsigned LoadStoreSize,
717 unsigned ChainSizeBytes, VectorType *VecTy) {
719 ChainSizeBytes, VecTy)
721 ChainSizeBytes, VecTy);
722 };
723
724#ifndef NDEBUG
728 "Should have filtered out non-power-of-two elements in "
729 "collectEquivalenceClasses.");
730 }
731#endif
732
735
736 std::vector Ret;
737 for (unsigned CBegin = 0; CBegin < C.size(); ++CBegin) {
738
739
740 SmallVector<std::pair<unsigned , unsigned >, 8>
741 CandidateChains;
742
743
745 APInt PrevReadEnd = C[CBegin].OffsetFromLeader + Sz;
746 for (unsigned CEnd = CBegin + 1, Size = C.size(); CEnd < Size; ++CEnd) {
747 APInt ReadEnd = C[CEnd].OffsetFromLeader +
749 unsigned BytesAdded =
750 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
751 Sz += BytesAdded;
752 if (Sz > VecRegBytes)
753 break;
754 CandidateChains.emplace_back(CEnd, Sz);
756 }
757
758
759 for (auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
760 It != End; ++It) {
761 auto [CEnd, SizeBytes] = *It;
763 dbgs() << "LSV: splitChainByAlignment considering candidate chain ["
764 << *C[CBegin].Inst << " ... " << *C[CEnd].Inst << "]\n");
765
766 Type *VecElemTy = getChainElemTy(C);
767
768
769
770 unsigned VecElemBits = DL.getTypeSizeInBits(VecElemTy);
771
772
773 assert((8 * SizeBytes) % VecElemBits == 0);
774 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
776 unsigned VF = 8 * VecRegBytes / VecElemBits;
777
778
779 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
780 VecElemBits * NumVecElems / 8, VecTy);
781 if (TargetVF != VF && TargetVF < NumVecElems) {
783 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
784 "because TargetVF="
785 << TargetVF << " != VF=" << VF
786 << " and TargetVF < NumVecElems=" << NumVecElems << "\n");
787 continue;
788 }
789
790
791
792
793
794 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &TTI = TTI,
795 &F = F](Align Alignment) {
796 if (Alignment.value() % SizeBytes == 0)
797 return true;
798 unsigned VectorizedSpeed = 0;
800 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
801 if (!AllowsMisaligned) {
803 << "LSV: Access of " << SizeBytes << "B in addrspace "
804 << AS << " with alignment " << Alignment.value()
805 << " is misaligned, and therefore can't be vectorized.\n");
806 return false;
807 }
808
809 unsigned ElementwiseSpeed = 0;
810 (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,
811 Alignment, &ElementwiseSpeed);
812 if (VectorizedSpeed < ElementwiseSpeed) {
814 << "LSV: Access of " << SizeBytes << "B in addrspace "
815 << AS << " with alignment " << Alignment.value()
816 << " has relative speed " << VectorizedSpeed
817 << ", which is lower than the elementwise speed of "
818 << ElementwiseSpeed
819 << ". Therefore this access won't be vectorized.\n");
820 return false;
821 }
822 return true;
823 };
824
825
826
827
828
829
830
831
832
833
835 bool IsAllocaAccess = AS == DL.getAllocaAddrSpace() &&
838 Align PrefAlign = Align(StackAdjustedAlignment);
839 if (IsAllocaAccess && Alignment.value() % SizeBytes != 0 &&
840 IsAllowedAndFast(PrefAlign)) {
842 PtrOperand, PrefAlign, DL, C[CBegin].Inst, nullptr, &DT);
843 if (NewAlign >= Alignment) {
845 << "LSV: splitByChain upgrading alloca alignment from "
846 << Alignment.value() << " to " << NewAlign.value()
847 << "\n");
848 Alignment = NewAlign;
849 }
850 }
851
852 if (!IsAllowedAndFast(Alignment)) {
854 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
855 "because its alignment is not AllowedAndFast: "
856 << Alignment.value() << "\n");
857 continue;
858 }
859
860 if ((IsLoadChain &&
862 (!IsLoadChain &&
865 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
866 "because !isLegalToVectorizeLoad/StoreChain.");
867 continue;
868 }
869
870
871 Chain &NewChain = Ret.emplace_back();
872 for (unsigned I = CBegin; I <= CEnd; ++I)
873 NewChain.emplace_back(C[I]);
874 CBegin = CEnd;
875 break;
876 }
877 }
878 return Ret;
879}
880
881bool Vectorizer::vectorizeChain(Chain &C) {
882 if (C.size() < 2)
883 return false;
884
885 sortChainInOffsetOrder(C);
886
888 dbgs() << "LSV: Vectorizing chain of " << C.size() << " instructions:\n";
889 dumpChain(C);
890 });
891
892 Type *VecElemTy = getChainElemTy(C);
895 unsigned BytesAdded = DL.getTypeStoreSize(getLoadStoreType(&*C[0].Inst));
896 APInt PrevReadEnd = C[0].OffsetFromLeader + BytesAdded;
897 unsigned ChainBytes = BytesAdded;
898 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
899 unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst));
900 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
901
902 BytesAdded =
903 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
904 ChainBytes += BytesAdded;
906 }
907
908 assert(8 * ChainBytes % DL.getTypeSizeInBits(VecElemTy) == 0);
909
910
911 unsigned NumElem = 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy);
913
915
916
917 if (AS == DL.getAllocaAddrSpace()) {
918 Alignment = std::max(
919 Alignment,
921 MaybeAlign(), DL, C[0].Inst, nullptr, &DT));
922 }
923
924
925#ifndef NDEBUG
926 for (const ChainElem &E : C)
928 DL.getTypeStoreSize(VecElemTy));
929#endif
930
932 if (IsLoadChain) {
933
934
937 return A.Inst->comesBefore(B.Inst);
938 })->Inst);
939
940
941 if (NumElem == 1)
942 VecTy = VecElemTy;
943
944
947 Alignment);
948
949 for (const ChainElem &E : C) {
953 unsigned EOffset =
954 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();
955 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);
957 V = VecInst;
960 llvm::seq(VecIdx, VecIdx + VT->getNumElements()));
962 } else {
965 }
966 if (V->getType() != I->getType())
969 }
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990 reorder(VecInst);
991 } else {
992
994 return A.Inst->comesBefore(B.Inst);
995 })->Inst);
996
997
999 auto InsertElem = [&](Value *V, unsigned VecIdx) {
1000 if (V->getType() != VecElemTy)
1003 };
1004 for (const ChainElem &E : C) {
1006 unsigned EOffset =
1007 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();
1008 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);
1009 if (FixedVectorType *VT =
1011 for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
1014 VecIdx++);
1015 }
1016 } else {
1017 InsertElem(I->getValueOperand(), VecIdx);
1018 }
1019 }
1020
1021
1022
1024 Vec,
1026 Alignment);
1027 }
1028
1030
1031 for (const ChainElem &E : C)
1032 ToErase.emplace_back(E.Inst);
1033
1034 ++NumVectorInstructions;
1035 NumScalarsVectorized += C.size();
1036 return true;
1037}
1038
1039template
1040bool Vectorizer::isSafeToMove(
1041 Instruction *ChainElem, Instruction *ChainBegin,
1042 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1043 BatchAAResults &BatchAA) {
1044 LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
1045 << *ChainBegin << ")\n");
1046
1048 if (ChainElem == ChainBegin)
1049 return true;
1050
1051
1052
1054 return true;
1055
1056 auto BBIt = std::next([&] {
1057 if constexpr (IsLoadChain)
1059 else
1061 }());
1062 auto BBItEnd = std::next([&] {
1063 if constexpr (IsLoadChain)
1065 else
1067 }());
1068
1069 const APInt &ChainElemOffset = ChainOffsets.at(ChainElem);
1070 const unsigned ChainElemSize =
1072
1073 for (; BBIt != BBItEnd; ++BBIt) {
1075
1076 if (->mayReadOrWriteMemory())
1077 continue;
1078
1079
1081 continue;
1082
1083
1085 continue;
1086
1087
1088
1089
1090
1091
1092
1093 if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {
1094
1095
1096
1097
1098
1099
1100 const APInt &IOffset = OffsetIt->second;
1102 if (IOffset == ChainElemOffset ||
1103 (IOffset.sle(ChainElemOffset) &&
1104 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1105 (ChainElemOffset.sle(IOffset) &&
1106 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1108
1109
1113 dbgs() << "LSV: Found alias in chain: " << *I << "\n";
1114 });
1115 return false;
1116 }
1117
1118 continue;
1119 }
1120
1121 LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");
1125 << " Aliasing instruction:\n"
1126 << " " << *I << '\n'
1127 << " Aliased instruction and pointer:\n"
1128 << " " << *ChainElem << '\n'
1130 << '\n');
1131
1132 return false;
1133 }
1134 }
1135 return true;
1136}
1137
1143
1145 unsigned MatchingOpIdxA, Instruction *AddOpB,
1146 unsigned MatchingOpIdxB, bool Signed) {
1147 LLVM_DEBUG(dbgs() << "LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1148 << ", AddOpA=" << *AddOpA << ", MatchingOpIdxA="
1149 << MatchingOpIdxA << ", AddOpB=" << *AddOpB
1150 << ", MatchingOpIdxB=" << MatchingOpIdxB
1151 << ", Signed=" << Signed << "\n");
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1167 AddOpB->getOpcode() == Instruction::Add &&
1169 if (AddOpA->getOperand(MatchingOpIdxA) ==
1170 AddOpB->getOperand(MatchingOpIdxB)) {
1171 Value *OtherOperandA = AddOpA->getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1172 Value *OtherOperandB = AddOpB->getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1175
1176 if (OtherInstrB && OtherInstrB->getOpcode() == Instruction::Add &&
1179 int64_t CstVal =
1181 if (OtherInstrB->getOperand(0) == OtherOperandA &&
1183 return true;
1184 }
1185
1186 if (OtherInstrA && OtherInstrA->getOpcode() == Instruction::Add &&
1189 int64_t CstVal =
1191 if (OtherInstrA->getOperand(0) == OtherOperandB &&
1193 return true;
1194 }
1195
1196
1197 if (OtherInstrA && OtherInstrB &&
1198 OtherInstrA->getOpcode() == Instruction::Add &&
1199 OtherInstrB->getOpcode() == Instruction::Add &&
1204 int64_t CstValA =
1206 int64_t CstValB =
1209 IdxDiff.getSExtValue() == (CstValB - CstValA))
1210 return true;
1211 }
1212 }
1213 return false;
1214}
1215
1216std::optional Vectorizer::getConstantOffsetComplexAddrs(
1217 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
1218 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1219 << " PtrB=" << *PtrB << " ContextInst=" << *ContextInst
1220 << " Depth=" << Depth << "\n");
1223 if (!GEPA || !GEPB)
1224 return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth);
1225
1226
1227
1228 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1229 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1230 return std::nullopt;
1233 for (unsigned I = 0, E = GEPA->getNumIndices() - 1; I < E; ++I) {
1235 return std::nullopt;
1236 ++GTIA;
1237 ++GTIB;
1238 }
1239
1244 return std::nullopt;
1245
1247
1248
1250 return std::nullopt;
1251
1253
1254
1258 return std::nullopt;
1259
1260 const SCEV *OffsetSCEVA = SE.getSCEV(ValA);
1261 const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
1262 const SCEV *IdxDiffSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1264 return std::nullopt;
1265
1266 ConstantRange IdxDiffRange = SE.getSignedRange(IdxDiffSCEV);
1268 return std::nullopt;
1270
1271 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1272 << "\n");
1273
1274
1275 bool Safe = false;
1276
1277
1278
1279 if (OpB->getOpcode() == Instruction::Add &&
1283 Safe = true;
1284
1285
1286
1288 if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&
1291
1292
1293
1294 for (unsigned MatchingOpIdxA : {0, 1})
1295 for (unsigned MatchingOpIdxB : {0, 1})
1296 if (!Safe)
1298 MatchingOpIdxB, Signed);
1299 }
1300
1302
1303
1304
1305
1306
1307
1308
1309
1310 if (!Safe) {
1311
1312
1315 &DT);
1316 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());
1319 Safe = BitsAllowedToBeSet.uge(IdxDiff.abs());
1320 }
1321
1322 if (Safe)
1323 return IdxDiff * Stride;
1324 return std::nullopt;
1325}
1326
1327std::optional Vectorizer::getConstantOffsetSelects(
1328 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
1329 if (Depth++ == MaxDepth)
1330 return std::nullopt;
1331
1334 if (SelectA->getCondition() != SelectB->getCondition())
1335 return std::nullopt;
1336 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1337 << ", PtrB=" << *PtrB << ", ContextInst="
1338 << *ContextInst << ", Depth=" << Depth << "\n");
1339 std::optional TrueDiff = getConstantOffset(
1340 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth);
1341 if (!TrueDiff)
1342 return std::nullopt;
1343 std::optional FalseDiff =
1344 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1345 ContextInst, Depth);
1346 if (TrueDiff == FalseDiff)
1347 return TrueDiff;
1348 }
1349 }
1350 return std::nullopt;
1351}
1352
1353void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {
1354 if (EQClasses.size() < 2)
1355 return;
1356
1357
1358
1359 static_assert(std::tuple_size_v == 4,
1360 "EqClassKey has changed - EqClassReducedKey needs changes too");
1361 using EqClassReducedKey =
1362 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1363 std::tuple_element_t<2, EqClassKey> ,
1364 std::tuple_element_t<3, EqClassKey> >;
1365 using ECReducedKeyToUnderlyingObjectMap =
1366 MapVector<EqClassReducedKey,
1367 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1368
1369
1370
1371
1372 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1373 bool FoundPotentiallyOptimizableEC = false;
1374 for (const auto &EC : EQClasses) {
1375 const auto &Key = EC.first;
1376 EqClassReducedKey RedKey{std::get<1>(Key), std::get<2>(Key),
1377 std::get<3>(Key)};
1378 auto &UOMap = RedKeyToUOMap[RedKey];
1380 if (UOMap.size() > 1)
1381 FoundPotentiallyOptimizableEC = true;
1382 }
1383 if (!FoundPotentiallyOptimizableEC)
1384 return;
1385
1387 dbgs() << "LSV: mergeEquivalenceClasses: before merging:\n";
1388 for (const auto &EC : EQClasses) {
1389 dbgs() << " Key: {" << EC.first << "}\n";
1390 for (const auto &Inst : EC.second)
1391 dbgs() << " Inst: " << *Inst << '\n';
1392 }
1393 });
1395 dbgs() << "LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1396 for (const auto &RedKeyToUO : RedKeyToUOMap) {
1397 dbgs() << " Reduced key: {" << std::get<0>(RedKeyToUO.first) << ", "
1398 << std::get<1>(RedKeyToUO.first) << ", "
1399 << static_cast<int>(std::get<2>(RedKeyToUO.first)) << "} --> "
1400 << RedKeyToUO.second.size() << " underlying objects:\n";
1401 for (auto UObject : RedKeyToUO.second)
1402 dbgs() << " " << *UObject << '\n';
1403 }
1404 });
1405
1406 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1407
1408
1409 auto GetUltimateTargets =
1410 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1411 UObjectToUObjectMap IndirectionMap;
1412 for (const auto *UObject : UObjects) {
1413 const unsigned MaxLookupDepth = 1;
1414 const auto *UltimateTarget = getUnderlyingObject(UObject, MaxLookupDepth);
1415 if (UltimateTarget != UObject)
1416 IndirectionMap[UObject] = UltimateTarget;
1417 }
1418 UObjectToUObjectMap UltimateTargetsMap;
1419 for (const auto *UObject : UObjects) {
1420 auto Target = UObject;
1421 auto It = IndirectionMap.find(Target);
1422 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1423 Target = It->second;
1424 UltimateTargetsMap[UObject] = Target;
1425 }
1426 return UltimateTargetsMap;
1427 };
1428
1429
1430
1431 for (auto &[RedKey, UObjects] : RedKeyToUOMap) {
1432 if (UObjects.size() < 2)
1433 continue;
1434 auto UTMap = GetUltimateTargets(UObjects);
1435 for (const auto &[UObject, UltimateTarget] : UTMap) {
1436 if (UObject == UltimateTarget)
1437 continue;
1438
1439 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1440 std::get<2>(RedKey)};
1441 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1442 std::get<2>(RedKey)};
1443
1444
1445 const auto &VecTo = EQClasses[KeyTo];
1446 const auto &VecFrom = EQClasses[KeyFrom];
1447 SmallVector<Instruction *, 8> MergedVec;
1448 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1449 std::back_inserter(MergedVec),
1450 [](Instruction *A, Instruction *B) {
1451 return A && B && A->comesBefore(B);
1452 });
1453 EQClasses[KeyTo] = std::move(MergedVec);
1454 EQClasses.erase(KeyFrom);
1455 }
1456 }
1458 dbgs() << "LSV: mergeEquivalenceClasses: after merging:\n";
1459 for (const auto &EC : EQClasses) {
1460 dbgs() << " Key: {" << EC.first << "}\n";
1461 for (const auto &Inst : EC.second)
1462 dbgs() << " Inst: " << *Inst << '\n';
1463 }
1464 });
1465}
1466
1467EquivalenceClassMap
1470 EquivalenceClassMap Ret;
1471
1472 auto GetUnderlyingObject = [](const Value *Ptr) -> const Value * {
1475
1476
1477
1478
1479
1480
1481 return Sel->getCondition();
1482 }
1483 return ObjPtr;
1484 };
1485
1486 for (Instruction &I : make_range(Begin, End)) {
1489 if (!LI && !SI)
1490 continue;
1491
1492 if ((LI && !LI->isSimple()) || (SI && ->isSimple()))
1493 continue;
1494
1497 continue;
1498
1500 if (!VectorType::isValidElementType(Ty->getScalarType()))
1501 continue;
1502
1503
1504
1505 unsigned TySize = DL.getTypeSizeInBits(Ty);
1506 if ((TySize % 8) != 0)
1507 continue;
1508
1509
1510
1511
1512
1514 continue;
1515
1519
1520 unsigned VF = VecRegSize / TySize;
1522
1523
1524 if ((!VecTy && (DL.getTypeSizeInBits(Ty))) ||
1525 (VecTy && (DL.getTypeSizeInBits(VecTy->getScalarType()))))
1526 continue;
1527
1528
1529 if (TySize > VecRegSize / 2 ||
1531 continue;
1532
1533 Ret[{GetUnderlyingObject(Ptr), AS,
1535 LI != nullptr}]
1536 .emplace_back(&I);
1537 }
1538
1539 mergeEquivalenceClasses(Ret);
1540 return Ret;
1541}
1542
1544 if (Instrs.empty())
1545 return {};
1546
1548 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);
1549
1550#ifndef NDEBUG
1551
1552 for (size_t I = 1; I < Instrs.size(); ++I) {
1553 assert(Instrs[I - 1]->comesBefore(Instrs[I]));
1555 }
1556#endif
1557
1558
1559
1560
1561
1562 struct InstrListElem : ilist_node,
1563 std::pair<Instruction *, Chain> {
1564 explicit InstrListElem(Instruction *I)
1566 };
1567 struct InstrListElemDenseMapInfo {
1568 using PtrInfo = DenseMapInfo<InstrListElem *>;
1569 using IInfo = DenseMapInfo<Instruction *>;
1570 static InstrListElem *getEmptyKey() { return PtrInfo::getEmptyKey(); }
1571 static InstrListElem *getTombstoneKey() {
1572 return PtrInfo::getTombstoneKey();
1573 }
1574 static unsigned getHashValue(const InstrListElem *E) {
1575 return IInfo::getHashValue(E->first);
1576 }
1577 static bool isEqual(const InstrListElem *A, const InstrListElem *B) {
1578 if (A == getEmptyKey() || B == getEmptyKey())
1579 return A == getEmptyKey() && B == getEmptyKey();
1580 if (A == getTombstoneKey() || B == getTombstoneKey())
1581 return A == getTombstoneKey() && B == getTombstoneKey();
1582 return IInfo::isEqual(A->first, B->first);
1583 }
1584 };
1585 SpecificBumpPtrAllocator Allocator;
1586 simple_ilist MRU;
1587 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1588
1589
1590
1591
1592 for (Instruction *I : Instrs) {
1593 constexpr int MaxChainsToTry = 64;
1594
1595 bool MatchFound = false;
1596 auto ChainIter = MRU.begin();
1597 for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();
1598 ++J, ++ChainIter) {
1599 if (std::optional Offset = getConstantOffset(
1602
1603 (ChainIter->first->comesBefore(I) ? I : ChainIter->first))) {
1604
1605
1606 ChainIter->second.emplace_back(I, Offset.value());
1607
1608 MRU.remove(*ChainIter);
1610 MatchFound = true;
1611 break;
1612 }
1613 }
1614
1615 if (!MatchFound) {
1616 APInt ZeroOffset(ASPtrBits, 0);
1617 InstrListElem *E = new (Allocator.Allocate()) InstrListElem(I);
1618 E->second.emplace_back(I, ZeroOffset);
1621 }
1622 }
1623
1624 std::vector Ret;
1625 Ret.reserve(Chains.size());
1626
1627 for (auto &E : MRU)
1628 if (E.second.size() > 1)
1629 Ret.emplace_back(std::move(E.second));
1630 return Ret;
1631}
1632
1633std::optional Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
1634 Instruction *ContextInst,
1635 unsigned Depth) {
1636 LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA
1637 << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst
1638 << ", Depth=" << Depth << "\n");
1639
1640
1641 unsigned OrigBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
1642 APInt OffsetA(OrigBitWidth, 0);
1643 APInt OffsetB(OrigBitWidth, 0);
1646 unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
1647 if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))
1648 return std::nullopt;
1649
1650
1651
1652
1653 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1654 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1655
1656 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1657 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1658 if (PtrA == PtrB)
1659 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1660
1661
1664 LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");
1665 ConstantRange DistRange = SE.getSignedRange(DistScev);
1667
1668
1670 return (OffsetB - OffsetA + Dist).sextOrTrunc(OrigBitWidth);
1671 }
1672 }
1673 if (std::optional Diff =
1674 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth))
1675 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1676 .sextOrTrunc(OrigBitWidth);
1677 return std::nullopt;
1678}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
Definition LoadStoreVectorizer.cpp:1138
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
Definition LoadStoreVectorizer.cpp:1144
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition LoadStoreVectorizer.cpp:410
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool isLegalToVectorizeLoad(LoadInst *LI) const
LLVM_ABI bool isLegalToVectorizeStore(StoreInst *SI) const
LLVM_ABI unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
LLVM_ABI bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
LLVM_ABI unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
LLVM_ABI unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
LLVM_ABI bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
LLVM_ABI bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.