LLVM: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
106#include
107#include
108#include
109#include
110#include
111#include
112#include
113#include
114#include <type_traits>
115#include
116#include
117
118using namespace llvm;
119
120#define DEBUG_TYPE "load-store-vectorizer"
121
122STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
124
125namespace {
126
127
128
129
130
131
132using EqClassKey =
133 std::tuple<const Value * ,
134 unsigned ,
135 unsigned ,
136 char
137 >;
139 const EqClassKey &K) {
140 const auto &[UnderlyingObject, AddrSpace, ElementSize, IsLoad] = K;
141 return OS << (IsLoad ? "load" : "store") << " of " << *UnderlyingObject
142 << " of element size " << ElementSize << " bits in addrspace "
143 << AddrSpace;
144}
145
146
147
148
149
150
151
152
153
154
155
156
157struct ChainElem {
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::move(Inst)), OffsetFromLeader(std::move(OffsetFromLeader)) {}
162};
164
165void sortChainInBBOrder(Chain &C) {
166 sort(C, [](auto &A, auto &B) { return A.Inst->comesBefore(B.Inst); });
167}
168
169void sortChainInOffsetOrder(Chain &C) {
170 sort(C, [](const auto &A, const auto &B) {
171 if (A.OffsetFromLeader != B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(B.OffsetFromLeader);
173 return A.Inst->comesBefore(B.Inst);
174 });
175}
176
179 dbgs() << " " << *E.Inst << " (offset " << E.OffsetFromLeader << ")\n";
180 }
181}
182
183using EquivalenceClassMap =
185
186
187constexpr unsigned StackAdjustedAlignment = 4;
188
191 for (const ChainElem &E : C)
194}
195
198 return LI != nullptr && LI->hasMetadata(LLVMContext::MD_invariant_load);
199}
200
201
202
206
208 while (!Worklist.empty()) {
211 for (int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->getOpcode() == Instruction::PHI)
214 continue;
215
216
217
218 if (IM->getParent() != I->getParent())
219 continue;
220
221 assert(IM != I && "Unexpected cycle while re-ordering instructions");
222
224 InstructionsToMove.insert(IM);
226 }
227 }
228 }
229
230
231 for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;) {
233 if (!InstructionsToMove.contains(IM))
234 continue;
236 }
237}
238
239class Vectorizer {
242 AssumptionCache &AC;
243 DominatorTree &DT;
244 ScalarEvolution &SE;
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
248
249
250
251
252
254
255
256
257 DenseSet<Instruction *> ExtraElements;
258
259public:
260 Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,
261 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
262 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
263 DL(F.getDataLayout()), Builder(SE.getContext()) {}
264
265 bool run();
266
267private:
268 static const unsigned MaxDepth = 3;
269
270
271
272
274
275
276
277 bool runOnEquivalenceClass(const EqClassKey &EqClassKey,
279
280
281
282
283 bool runOnChain(Chain &C);
284
285
286
287
288
289 std::vector splitChainByContiguity(Chain &C);
290
291
292
293
294
295 std::vector splitChainByMayAliasInstrs(Chain &C);
296
297
298
299 std::vector splitChainByAlignment(Chain &C);
300
301
302
303 bool vectorizeChain(Chain &C);
304
305
306 std::optional getConstantOffset(Value *PtrA, Value *PtrB,
307 Instruction *ContextInst,
308 unsigned Depth = 0);
309 std::optional getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,
310 Instruction *ContextInst,
312 std::optional getConstantOffsetSelects(Value *PtrA, Value *PtrB,
313 Instruction *ContextInst,
315
316
317
318
319 Type *getChainElemTy(const Chain &C);
320
321
322
323
324
325
326
327
328 template
330 Instruction *ChainElem, Instruction *ChainBegin,
331 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
332 BatchAAResults &BatchAA);
333
334
335
336
337 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const;
338
339
340
341
342
343
346
347
348
349
350
351
353
354
355
356
357
358 bool accessIsAllowedAndFast(unsigned SizeBytes, unsigned AS, Align Alignment,
359 unsigned VecElemBits) const;
360
361
362
363
364
365 ChainElem createExtraElementAfter(const ChainElem &PrevElem, Type *Ty,
366 APInt Offset, StringRef Prefix,
367 Align Alignment = Align());
368
369
370
372 FixedVectorType *VecTy);
373
374
375
376 void deleteExtraElements();
377};
378
379class LoadStoreVectorizerLegacyPass : public FunctionPass {
380public:
381 static char ID;
382
383 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
386 }
387
389
390 StringRef getPassName() const override {
391 return "GPU Load and Store Vectorizer";
392 }
393
394 void getAnalysisUsage(AnalysisUsage &AU) const override {
396 AU.addRequired();
397 AU.addRequired();
398 AU.addRequired();
399 AU.addRequired();
401 }
402};
403
404}
405
406char LoadStoreVectorizerLegacyPass::ID = 0;
407
409 "Vectorize load and Store instructions", false, false)
417 "Vectorize load and store instructions", false, false)
418
420 return new LoadStoreVectorizerLegacyPass();
421}
422
423bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {
424
425 if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
426 return false;
427
428 AliasAnalysis &AA = getAnalysis().getAAResults();
429 DominatorTree &DT = getAnalysis().getDomTree();
430 ScalarEvolution &SE = getAnalysis().getSE();
431 TargetTransformInfo &TTI =
432 getAnalysis().getTTI(F);
433
434 AssumptionCache &AC =
435 getAnalysis().getAssumptionCache(F);
436
437 return Vectorizer(F, AA, AC, DT, SE, TTI).run();
438}
439
442
443 if (F.hasFnAttribute(Attribute::NoImplicitFloat))
445
451
452 bool Changed = Vectorizer(F, AA, AC, DT, SE, TTI).run();
456}
457
458bool Vectorizer::run() {
460
461
462
463
464
465
466
467
468
469
470
471
472
473
475
476 assert(!BB->empty());
477
484
485 for (auto It = Barriers.begin(), End = std::prev(Barriers.end()); It != End;
486 ++It)
487 Changed |= runOnPseudoBB(*It, *std::next(It));
488
490
491
492
493
494
495
497 continue;
499 if (I->use_empty())
500 I->eraseFromParent();
502 }
503 ToErase.clear();
504 deleteExtraElements();
505 }
506
508}
509
513 dbgs() << "LSV: Running on pseudo-BB [" << *Begin << " ... ";
514 if (End != Begin->getParent()->end())
515 dbgs() << *End;
516 else
517 dbgs() << "";
518 dbgs() << ")\n";
519 });
520
522 for (const auto &[EqClassKey, EqClass] :
523 collectEquivalenceClasses(Begin, End))
524 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
525
527}
528
529bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,
532
534 dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()
535 << " keyed on " << EqClassKey << ":\n";
536 for (Instruction *I : EqClass)
537 dbgs() << " " << *I << "\n";
538 });
539
540 std::vector Chains = gatherChains(EqClass);
542 << " nontrivial chains.\n";);
543 for (Chain &C : Chains)
546}
547
548bool Vectorizer::runOnChain(Chain &C) {
550 dbgs() << "LSV: Running on chain with " << C.size() << " instructions:\n";
551 dumpChain(C);
552 });
553
554
555
556
557
558
559
561 for (auto &C : splitChainByMayAliasInstrs(C))
562 for (auto &C : splitChainByContiguity(C))
563 for (auto &C : splitChainByAlignment(C))
564 Changed |= vectorizeChain(C);
566}
567
568std::vector Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
569 if (C.empty())
570 return {};
571
572 sortChainInBBOrder(C);
573
575 dbgs() << "LSV: splitChainByMayAliasInstrs considering chain:\n";
576 dumpChain(C);
577 });
578
579
580
581
582 DenseMap<Instruction *, APInt > ChainOffsets;
584 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
585
586
587
588 BatchAAResults BatchAA(AA);
589
590
591
592
593
594
595
596
597
598
599
600
601 auto Impl = [&](auto IsLoad) {
602
603 auto [ChainBegin, ChainEnd] = [&](auto IsLoad) {
604 if constexpr (IsLoad())
605 return std::make_pair(C.begin(), C.end());
606 else
607 return std::make_pair(C.rbegin(), C.rend());
608 }(IsLoad);
609 assert(ChainBegin != ChainEnd);
610
611 std::vector Chains;
614 for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
616 ChainOffsets, BatchAA)) {
617 LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "
618 << *ChainIt->Inst << " into " << *ChainBegin->Inst
619 << "\n");
621 } else {
623 dbgs() << "LSV: Found intervening may-alias instrs; cannot merge "
624 << *ChainIt->Inst << " into " << *ChainBegin->Inst << "\n");
625 if (NewChain.size() > 1) {
627 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";
628 dumpChain(NewChain);
629 });
630 Chains.emplace_back(std::move(NewChain));
631 }
632
633
635 }
636 }
637 if (NewChain.size() > 1) {
639 dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";
640 dumpChain(NewChain);
641 });
642 Chains.emplace_back(std::move(NewChain));
643 }
644 return Chains;
645 };
646
648 return Impl(std::bool_constant());
649
651 return Impl(std::bool_constant());
652}
653
654std::vector Vectorizer::splitChainByContiguity(Chain &C) {
655 if (C.empty())
656 return {};
657
658 sortChainInOffsetOrder(C);
659
661 dbgs() << "LSV: splitChainByContiguity considering chain:\n";
662 dumpChain(C);
663 });
664
665
666
667
668
669
670
671
675 Align OptimisticAlign = Align(MaxVecRegBits / 8);
676 unsigned int MaxVectorNumElems =
677 MaxVecRegBits / DL.getTypeSizeInBits(ElementType);
678
679
680
681
682
683
684 FixedVectorType *OptimisticVectorType =
686 bool TryFillGaps =
692
693
694
696 APInt OffsetOfBestAlignedElemFromLeader = C[0].OffsetFromLeader;
699 if (ElementAlignment > BestAlignedElemAlign) {
700 BestAlignedElemAlign = ElementAlignment;
701 OffsetOfBestAlignedElemFromLeader = E.OffsetFromLeader;
702 }
703 }
704
705 auto DeriveAlignFromBestAlignedElem = [&](APInt NewElemOffsetFromLeader) {
707 BestAlignedElemAlign,
708 (NewElemOffsetFromLeader - OffsetOfBestAlignedElemFromLeader)
710 .getLimitedValue());
711 };
712
713 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);
714
715 std::vector Ret;
716 Ret.push_back({C.front()});
717
718 unsigned ChainElemTyBits = DL.getTypeSizeInBits(getChainElemTy(C));
719 ChainElem &Prev = C[0];
720 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
721 auto &CurChain = Ret.back();
722
723 APInt PrevSzBytes =
725 APInt PrevReadEnd = Prev.OffsetFromLeader + PrevSzBytes;
727
728
730 8 * SzBytes % ChainElemTyBits == 0 &&
731 "Every chain-element size must be a multiple of the element size after "
732 "vectorization.");
733 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
734
735 bool AreContiguous = false;
736 if (It->OffsetFromLeader.sle(PrevReadEnd)) {
737
738 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();
739 if (8 * Overlap % ChainElemTyBits == 0)
740 AreContiguous = true;
741 }
742
744 << (AreContiguous ? "contiguous" : "chain-breaker")
745 << *It->Inst << " (starts at offset "
746 << It->OffsetFromLeader << ")\n");
747
748
749
750
751
752
753
754 bool GapFilled = false;
755 if (!AreContiguous && TryFillGaps && PrevSzBytes == SzBytes) {
756 APInt GapSzBytes = It->OffsetFromLeader - PrevReadEnd;
757 if (GapSzBytes == PrevSzBytes) {
758
759 ChainElem NewElem = createExtraElementAfter(
761 DeriveAlignFromBestAlignedElem(PrevReadEnd));
762 CurChain.push_back(NewElem);
763 GapFilled = true;
764 }
765
766
767
768 if ((GapSzBytes == 2 * PrevSzBytes) && (CurChain.size() % 4 == 1)) {
769 ChainElem NewElem1 = createExtraElementAfter(
771 DeriveAlignFromBestAlignedElem(PrevReadEnd));
772 ChainElem NewElem2 = createExtraElementAfter(
773 NewElem1, getLoadStoreType(Prev.Inst), PrevSzBytes, "GapFill",
774 DeriveAlignFromBestAlignedElem(PrevReadEnd + PrevSzBytes));
775 CurChain.push_back(NewElem1);
776 CurChain.push_back(NewElem2);
777 GapFilled = true;
778 }
779 }
780
781 if (AreContiguous || GapFilled)
782 CurChain.push_back(*It);
783 else
784 Ret.push_back({*It});
785
786
787
788 if (ReadEnd.sge(PrevReadEnd))
789 Prev = *It;
790 }
791
792
793 llvm::erase_if(Ret, [](const auto &Chain) { return Chain.size() <= 1; });
794 return Ret;
795}
796
797Type *Vectorizer::getChainElemTy(const Chain &C) {
799
800
801
802
803
804
805
806
807
808
809
810 if (any_of(C, [](const ChainElem &E) {
812 })) {
813 return Type::getIntNTy(
814 F.getContext(),
816 }
817
818 for (const ChainElem &E : C)
820 return T;
822}
823
824std::vector Vectorizer::splitChainByAlignment(Chain &C) {
825
826
827
828
829
830
831
832
833
834 if (C.empty())
835 return {};
836
837 sortChainInOffsetOrder(C);
838
840 dbgs() << "LSV: splitChainByAlignment considering chain:\n";
841 dumpChain(C);
842 });
843
845 auto GetVectorFactor = [&](unsigned VF, unsigned LoadStoreSize,
846 unsigned ChainSizeBytes, VectorType *VecTy) {
848 ChainSizeBytes, VecTy)
850 ChainSizeBytes, VecTy);
851 };
852
853#ifndef NDEBUG
857 "Should have filtered out non-power-of-two elements in "
858 "collectEquivalenceClasses.");
859 }
860#endif
861
864
865
866
867
868 bool CandidateChainsMayContainExtraLoadsStores = any_of(
869 C, [this](const ChainElem &E) { return ExtraElements.contains(E.Inst); });
870
871 std::vector Ret;
872 for (unsigned CBegin = 0; CBegin < C.size(); ++CBegin) {
873
874
875 SmallVector<std::pair<unsigned , unsigned >, 8>
876 CandidateChains;
877
878
880 APInt PrevReadEnd = C[CBegin].OffsetFromLeader + Sz;
881 for (unsigned CEnd = CBegin + 1, Size = C.size(); CEnd < Size; ++CEnd) {
882 APInt ReadEnd = C[CEnd].OffsetFromLeader +
884 unsigned BytesAdded =
885 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
886 Sz += BytesAdded;
887 if (Sz > VecRegBytes)
888 break;
889 CandidateChains.emplace_back(CEnd, Sz);
891 }
892
893
894 for (auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
895 It != End; ++It) {
896 auto [CEnd, SizeBytes] = *It;
898 dbgs() << "LSV: splitChainByAlignment considering candidate chain ["
899 << *C[CBegin].Inst << " ... " << *C[CEnd].Inst << "]\n");
900
901 Type *VecElemTy = getChainElemTy(C);
902
903
904
905 unsigned VecElemBits = DL.getTypeSizeInBits(VecElemTy);
906
907
908 assert((8 * SizeBytes) % VecElemBits == 0);
909 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
911 unsigned VF = 8 * VecRegBytes / VecElemBits;
912
913
914 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
915 VecElemBits * NumVecElems / 8, VecTy);
916 if (TargetVF != VF && TargetVF < NumVecElems) {
918 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
919 "because TargetVF="
920 << TargetVF << " != VF=" << VF
921 << " and TargetVF < NumVecElems=" << NumVecElems << "\n");
922 continue;
923 }
924
925
926
927
928
929
930
931
932
933
935 bool IsAllocaAccess = AS == DL.getAllocaAddrSpace() &&
938 Align PrefAlign = Align(StackAdjustedAlignment);
939 if (IsAllocaAccess && Alignment.value() % SizeBytes != 0 &&
940 accessIsAllowedAndFast(SizeBytes, AS, PrefAlign, VecElemBits)) {
942 PtrOperand, PrefAlign, DL, C[CBegin].Inst, nullptr, &DT);
943 if (NewAlign >= Alignment) {
945 << "LSV: splitByChain upgrading alloca alignment from "
946 << Alignment.value() << " to " << NewAlign.value()
947 << "\n");
948 Alignment = NewAlign;
949 }
950 }
951
952 Chain ExtendingLoadsStores;
953 if (!accessIsAllowedAndFast(SizeBytes, AS, Alignment, VecElemBits)) {
954
955
956
957 bool AllowedAndFast = false;
958 if (NumVecElems < TargetVF && (NumVecElems) &&
959 VecElemBits >= 8) {
960
961
962 assert(VecElemBits % 8 == 0);
963 unsigned VecElemBytes = VecElemBits / 8;
964 unsigned NewNumVecElems = PowerOf2Ceil(NumVecElems);
965 unsigned NewSizeBytes = VecElemBytes * NewNumVecElems;
966
968 "TargetVF expected to be a power of 2");
969 assert(NewNumVecElems <= TargetVF &&
970 "Should not extend past TargetVF");
971
973 << "LSV: attempting to extend chain of " << NumVecElems
974 << " " << (IsLoadChain ? "loads" : "stores") << " to "
975 << NewNumVecElems << " elements\n");
976 bool IsLegalToExtend =
983
984
985
986 if (IsLegalToExtend &&
987 accessIsAllowedAndFast(NewSizeBytes, AS, Alignment,
988 VecElemBits)) {
990 << "LSV: extending " << (IsLoadChain ? "load" : "store")
991 << " chain of " << NumVecElems << " "
992 << (IsLoadChain ? "loads" : "stores")
993 << " with total byte size of " << SizeBytes << " to "
994 << NewNumVecElems << " "
995 << (IsLoadChain ? "loads" : "stores")
996 << " with total byte size of " << NewSizeBytes
997 << ", TargetVF=" << TargetVF << " \n");
998
999
1000
1001
1002
1003 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);
1004 for (unsigned I = 0; I < (NewNumVecElems - NumVecElems); I++) {
1005 ChainElem NewElem = createExtraElementAfter(
1006 C[CBegin], VecElemTy,
1007 APInt(ASPtrBits, SizeBytes + I * VecElemBytes), "Extend");
1008 ExtendingLoadsStores.push_back(NewElem);
1009 }
1010
1011
1012 SizeBytes = NewSizeBytes;
1013 NumVecElems = NewNumVecElems;
1014 AllowedAndFast = true;
1015 }
1016 }
1017 if (!AllowedAndFast) {
1018
1020 << "LSV: splitChainByAlignment discarding candidate chain "
1021 "because its alignment is not AllowedAndFast: "
1022 << Alignment.value() << "\n");
1023 continue;
1024 }
1025 }
1026
1027 if ((IsLoadChain &&
1029 (!IsLoadChain &&
1032 dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
1033 "because !isLegalToVectorizeLoad/StoreChain.");
1034 continue;
1035 }
1036
1037 if (CandidateChainsMayContainExtraLoadsStores) {
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 bool CurrCandContainsExtraLoadsStores = llvm::any_of(
1049 [this](const ChainElem &E) {
1050 return ExtraElements.contains(E.Inst);
1051 });
1052
1053 if (CurrCandContainsExtraLoadsStores &&
1061 << "LSV: splitChainByAlignment discarding candidate chain "
1062 "because it contains extra loads/stores that we cannot "
1063 "legally vectorize into a masked load/store \n");
1064 continue;
1065 }
1066 }
1067
1068
1070 for (unsigned I = CBegin; I <= CEnd; ++I)
1071 NewChain.emplace_back(C[I]);
1072 for (ChainElem E : ExtendingLoadsStores)
1073 NewChain.emplace_back(E);
1074 CBegin = CEnd;
1075 break;
1076 }
1077 }
1078 return Ret;
1079}
1080
1081bool Vectorizer::vectorizeChain(Chain &C) {
1082 if (C.size() < 2)
1083 return false;
1084
1085 bool ChainContainsExtraLoadsStores = llvm::any_of(
1086 C, [this](const ChainElem &E) { return ExtraElements.contains(E.Inst); });
1087
1088
1089
1090 if (C.size() == 2 && ChainContainsExtraLoadsStores)
1091 return false;
1092
1093 sortChainInOffsetOrder(C);
1094
1096 dbgs() << "LSV: Vectorizing chain of " << C.size() << " instructions:\n";
1097 dumpChain(C);
1098 });
1099
1100 Type *VecElemTy = getChainElemTy(C);
1103 unsigned BytesAdded = DL.getTypeStoreSize(getLoadStoreType(&*C[0].Inst));
1104 APInt PrevReadEnd = C[0].OffsetFromLeader + BytesAdded;
1105 unsigned ChainBytes = BytesAdded;
1106 for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
1107 unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst));
1108 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
1109
1110 BytesAdded =
1111 PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
1112 ChainBytes += BytesAdded;
1114 }
1115
1116 assert(8 * ChainBytes % DL.getTypeSizeInBits(VecElemTy) == 0);
1117
1118
1119 unsigned NumElem = 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy);
1121
1123
1124
1125 if (AS == DL.getAllocaAddrSpace()) {
1126 Alignment = std::max(
1127 Alignment,
1129 MaybeAlign(), DL, C[0].Inst, nullptr, &DT));
1130 }
1131
1132
1133#ifndef NDEBUG
1134 for (const ChainElem &E : C)
1136 DL.getTypeStoreSize(VecElemTy));
1137#endif
1138
1140 if (IsLoadChain) {
1141
1142
1145 return A.Inst->comesBefore(B.Inst);
1146 })->Inst);
1147
1148
1149
1150 if (ChainContainsExtraLoadsStores) {
1156 } else {
1157
1158
1159 if (NumElem == 1)
1160 VecTy = VecElemTy;
1161
1162
1165 }
1166
1167 for (const ChainElem &E : C) {
1171 unsigned EOffset =
1172 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();
1173 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);
1175 V = VecInst;
1178 llvm::seq(VecIdx, VecIdx + VT->getNumElements()));
1180 } else {
1183 }
1184 if (V->getType() != I->getType())
1187 }
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 reorder(VecInst);
1209 } else {
1210
1212 return A.Inst->comesBefore(B.Inst);
1213 })->Inst);
1214
1215
1217 auto InsertElem = [&](Value *V, unsigned VecIdx) {
1218 if (V->getType() != VecElemTy)
1221 };
1222 for (const ChainElem &E : C) {
1224 unsigned EOffset =
1225 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();
1226 unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy);
1227 if (FixedVectorType *VT =
1229 for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
1232 VecIdx++);
1233 }
1234 } else {
1235 InsertElem(I->getValueOperand(), VecIdx);
1236 }
1237 }
1238
1239
1240
1241 if (ChainContainsExtraLoadsStores) {
1248 } else {
1249
1250
1253 }
1254 }
1255
1257
1258 for (const ChainElem &E : C)
1259 ToErase.emplace_back(E.Inst);
1260
1261 ++NumVectorInstructions;
1262 NumScalarsVectorized += C.size();
1263 return true;
1264}
1265
1266template
1267bool Vectorizer::isSafeToMove(
1268 Instruction *ChainElem, Instruction *ChainBegin,
1269 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1270 BatchAAResults &BatchAA) {
1271 LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
1272 << *ChainBegin << ")\n");
1273
1275 if (ChainElem == ChainBegin)
1276 return true;
1277
1278
1279
1281 return true;
1282
1283 auto BBIt = std::next([&] {
1284 if constexpr (IsLoadChain)
1286 else
1288 }());
1289 auto BBItEnd = std::next([&] {
1290 if constexpr (IsLoadChain)
1292 else
1294 }());
1295
1296 const APInt &ChainElemOffset = ChainOffsets.at(ChainElem);
1297 const unsigned ChainElemSize =
1299
1300 for (; BBIt != BBItEnd; ++BBIt) {
1302
1303 if (->mayReadOrWriteMemory())
1304 continue;
1305
1306
1308 continue;
1309
1310
1312 continue;
1313
1314
1315
1316
1317
1318
1319
1320 if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {
1321
1322
1323
1324
1325
1326
1327 const APInt &IOffset = OffsetIt->second;
1329 if (IOffset == ChainElemOffset ||
1330 (IOffset.sle(ChainElemOffset) &&
1331 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1332 (ChainElemOffset.sle(IOffset) &&
1333 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1335
1336
1340 dbgs() << "LSV: Found alias in chain: " << *I << "\n";
1341 });
1342 return false;
1343 }
1344
1345 continue;
1346 }
1347
1348 LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");
1352 << " Aliasing instruction:\n"
1353 << " " << *I << '\n'
1354 << " Aliased instruction and pointer:\n"
1355 << " " << *ChainElem << '\n'
1357 << '\n');
1358
1359 return false;
1360 }
1361 }
1362 return true;
1363}
1364
1370
1372 unsigned MatchingOpIdxA, Instruction *AddOpB,
1373 unsigned MatchingOpIdxB, bool Signed) {
1374 LLVM_DEBUG(dbgs() << "LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1375 << ", AddOpA=" << *AddOpA << ", MatchingOpIdxA="
1376 << MatchingOpIdxA << ", AddOpB=" << *AddOpB
1377 << ", MatchingOpIdxB=" << MatchingOpIdxB
1378 << ", Signed=" << Signed << "\n");
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1394 AddOpB->getOpcode() == Instruction::Add &&
1396 if (AddOpA->getOperand(MatchingOpIdxA) ==
1397 AddOpB->getOperand(MatchingOpIdxB)) {
1398 Value *OtherOperandA = AddOpA->getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1399 Value *OtherOperandB = AddOpB->getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1402
1403 if (OtherInstrB && OtherInstrB->getOpcode() == Instruction::Add &&
1406 int64_t CstVal =
1408 if (OtherInstrB->getOperand(0) == OtherOperandA &&
1410 return true;
1411 }
1412
1413 if (OtherInstrA && OtherInstrA->getOpcode() == Instruction::Add &&
1416 int64_t CstVal =
1418 if (OtherInstrA->getOperand(0) == OtherOperandB &&
1420 return true;
1421 }
1422
1423
1424 if (OtherInstrA && OtherInstrB &&
1425 OtherInstrA->getOpcode() == Instruction::Add &&
1426 OtherInstrB->getOpcode() == Instruction::Add &&
1431 int64_t CstValA =
1433 int64_t CstValB =
1436 IdxDiff.getSExtValue() == (CstValB - CstValA))
1437 return true;
1438 }
1439 }
1440 return false;
1441}
1442
1443std::optional Vectorizer::getConstantOffsetComplexAddrs(
1444 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
1445 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1446 << " PtrB=" << *PtrB << " ContextInst=" << *ContextInst
1447 << " Depth=" << Depth << "\n");
1450 if (!GEPA || !GEPB)
1451 return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth);
1452
1453
1454
1455 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1456 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1457 return std::nullopt;
1460 for (unsigned I = 0, E = GEPA->getNumIndices() - 1; I < E; ++I) {
1462 return std::nullopt;
1463 ++GTIA;
1464 ++GTIB;
1465 }
1466
1471 return std::nullopt;
1472
1474
1475
1477 return std::nullopt;
1478
1480
1481
1485 return std::nullopt;
1486
1487 const SCEV *OffsetSCEVA = SE.getSCEV(ValA);
1488 const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
1489 const SCEV *IdxDiffSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1491 return std::nullopt;
1492
1493 ConstantRange IdxDiffRange = SE.getSignedRange(IdxDiffSCEV);
1495 return std::nullopt;
1497
1498 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1499 << "\n");
1500
1501
1502 bool Safe = false;
1503
1504
1505
1506 if (OpB->getOpcode() == Instruction::Add &&
1510 Safe = true;
1511
1512
1513
1515 if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&
1518
1519
1520
1521 for (unsigned MatchingOpIdxA : {0, 1})
1522 for (unsigned MatchingOpIdxB : {0, 1})
1523 if (!Safe)
1525 MatchingOpIdxB, Signed);
1526 }
1527
1529
1530
1531
1532
1533
1534
1535
1536
1537 if (!Safe) {
1538
1539
1542 &DT);
1543 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());
1546 Safe = BitsAllowedToBeSet.uge(IdxDiff.abs());
1547 }
1548
1549 if (Safe)
1550 return IdxDiff * Stride;
1551 return std::nullopt;
1552}
1553
1554std::optional Vectorizer::getConstantOffsetSelects(
1555 Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
1556 if (Depth++ == MaxDepth)
1557 return std::nullopt;
1558
1561 if (SelectA->getCondition() != SelectB->getCondition())
1562 return std::nullopt;
1563 LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1564 << ", PtrB=" << *PtrB << ", ContextInst="
1565 << *ContextInst << ", Depth=" << Depth << "\n");
1566 std::optional TrueDiff = getConstantOffset(
1567 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth);
1568 if (!TrueDiff)
1569 return std::nullopt;
1570 std::optional FalseDiff =
1571 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1572 ContextInst, Depth);
1573 if (TrueDiff == FalseDiff)
1574 return TrueDiff;
1575 }
1576 }
1577 return std::nullopt;
1578}
1579
1580void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {
1581 if (EQClasses.size() < 2)
1582 return;
1583
1584
1585
1586 static_assert(std::tuple_size_v == 4,
1587 "EqClassKey has changed - EqClassReducedKey needs changes too");
1588 using EqClassReducedKey =
1589 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1590 std::tuple_element_t<2, EqClassKey> ,
1591 std::tuple_element_t<3, EqClassKey> >;
1592 using ECReducedKeyToUnderlyingObjectMap =
1593 MapVector<EqClassReducedKey,
1594 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1595
1596
1597
1598
1599 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1600 bool FoundPotentiallyOptimizableEC = false;
1601 for (const auto &EC : EQClasses) {
1602 const auto &Key = EC.first;
1603 EqClassReducedKey RedKey{std::get<1>(Key), std::get<2>(Key),
1604 std::get<3>(Key)};
1605 auto &UOMap = RedKeyToUOMap[RedKey];
1607 if (UOMap.size() > 1)
1608 FoundPotentiallyOptimizableEC = true;
1609 }
1610 if (!FoundPotentiallyOptimizableEC)
1611 return;
1612
1614 dbgs() << "LSV: mergeEquivalenceClasses: before merging:\n";
1615 for (const auto &EC : EQClasses) {
1616 dbgs() << " Key: {" << EC.first << "}\n";
1617 for (const auto &Inst : EC.second)
1618 dbgs() << " Inst: " << *Inst << '\n';
1619 }
1620 });
1622 dbgs() << "LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1623 for (const auto &RedKeyToUO : RedKeyToUOMap) {
1624 dbgs() << " Reduced key: {" << std::get<0>(RedKeyToUO.first) << ", "
1625 << std::get<1>(RedKeyToUO.first) << ", "
1626 << static_cast<int>(std::get<2>(RedKeyToUO.first)) << "} --> "
1627 << RedKeyToUO.second.size() << " underlying objects:\n";
1628 for (auto UObject : RedKeyToUO.second)
1629 dbgs() << " " << *UObject << '\n';
1630 }
1631 });
1632
1633 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1634
1635
1636 auto GetUltimateTargets =
1637 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1638 UObjectToUObjectMap IndirectionMap;
1639 for (const auto *UObject : UObjects) {
1640 const unsigned MaxLookupDepth = 1;
1641 const auto *UltimateTarget = getUnderlyingObject(UObject, MaxLookupDepth);
1642 if (UltimateTarget != UObject)
1643 IndirectionMap[UObject] = UltimateTarget;
1644 }
1645 UObjectToUObjectMap UltimateTargetsMap;
1646 for (const auto *UObject : UObjects) {
1647 auto Target = UObject;
1648 auto It = IndirectionMap.find(Target);
1649 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1650 Target = It->second;
1651 UltimateTargetsMap[UObject] = Target;
1652 }
1653 return UltimateTargetsMap;
1654 };
1655
1656
1657
1658 for (auto &[RedKey, UObjects] : RedKeyToUOMap) {
1659 if (UObjects.size() < 2)
1660 continue;
1661 auto UTMap = GetUltimateTargets(UObjects);
1662 for (const auto &[UObject, UltimateTarget] : UTMap) {
1663 if (UObject == UltimateTarget)
1664 continue;
1665
1666 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1667 std::get<2>(RedKey)};
1668 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1669 std::get<2>(RedKey)};
1670
1671
1672 const auto &VecTo = EQClasses[KeyTo];
1673 const auto &VecFrom = EQClasses[KeyFrom];
1674 SmallVector<Instruction *, 8> MergedVec;
1675 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1676 std::back_inserter(MergedVec),
1677 [](Instruction *A, Instruction *B) {
1678 return A && B && A->comesBefore(B);
1679 });
1680 EQClasses[KeyTo] = std::move(MergedVec);
1681 EQClasses.erase(KeyFrom);
1682 }
1683 }
1685 dbgs() << "LSV: mergeEquivalenceClasses: after merging:\n";
1686 for (const auto &EC : EQClasses) {
1687 dbgs() << " Key: {" << EC.first << "}\n";
1688 for (const auto &Inst : EC.second)
1689 dbgs() << " Inst: " << *Inst << '\n';
1690 }
1691 });
1692}
1693
1694EquivalenceClassMap
1697 EquivalenceClassMap Ret;
1698
1699 auto GetUnderlyingObject = [](const Value *Ptr) -> const Value * {
1702
1703
1704
1705
1706
1707
1708 return Sel->getCondition();
1709 }
1710 return ObjPtr;
1711 };
1712
1713 for (Instruction &I : make_range(Begin, End)) {
1716 if (!LI && !SI)
1717 continue;
1718
1719 if ((LI && !LI->isSimple()) || (SI && ->isSimple()))
1720 continue;
1721
1724 continue;
1725
1727 if (!VectorType::isValidElementType(Ty->getScalarType()))
1728 continue;
1729
1730
1731
1732 unsigned TySize = DL.getTypeSizeInBits(Ty);
1733 if ((TySize % 8) != 0)
1734 continue;
1735
1736
1737
1738
1739
1741 continue;
1742
1746
1747 unsigned VF = VecRegSize / TySize;
1749
1750
1751 if ((!VecTy && (DL.getTypeSizeInBits(Ty))) ||
1752 (VecTy && (DL.getTypeSizeInBits(VecTy->getScalarType()))))
1753 continue;
1754
1755
1756 if (TySize > VecRegSize / 2 ||
1758 continue;
1759
1760 Ret[{GetUnderlyingObject(Ptr), AS,
1762 LI != nullptr}]
1763 .emplace_back(&I);
1764 }
1765
1766 mergeEquivalenceClasses(Ret);
1767 return Ret;
1768}
1769
1771 if (Instrs.empty())
1772 return {};
1773
1775 unsigned ASPtrBits = DL.getIndexSizeInBits(AS);
1776
1777#ifndef NDEBUG
1778
1779 for (size_t I = 1; I < Instrs.size(); ++I) {
1780 assert(Instrs[I - 1]->comesBefore(Instrs[I]));
1782 }
1783#endif
1784
1785
1786
1787
1788
1789 struct InstrListElem : ilist_node,
1790 std::pair<Instruction *, Chain> {
1791 explicit InstrListElem(Instruction *I)
1793 };
1794 struct InstrListElemDenseMapInfo {
1795 using PtrInfo = DenseMapInfo<InstrListElem *>;
1796 using IInfo = DenseMapInfo<Instruction *>;
1797 static InstrListElem *getEmptyKey() { return PtrInfo::getEmptyKey(); }
1798 static InstrListElem *getTombstoneKey() {
1799 return PtrInfo::getTombstoneKey();
1800 }
1801 static unsigned getHashValue(const InstrListElem *E) {
1802 return IInfo::getHashValue(E->first);
1803 }
1804 static bool isEqual(const InstrListElem *A, const InstrListElem *B) {
1805 if (A == getEmptyKey() || B == getEmptyKey())
1806 return A == getEmptyKey() && B == getEmptyKey();
1807 if (A == getTombstoneKey() || B == getTombstoneKey())
1808 return A == getTombstoneKey() && B == getTombstoneKey();
1809 return IInfo::isEqual(A->first, B->first);
1810 }
1811 };
1812 SpecificBumpPtrAllocator Allocator;
1813 simple_ilist MRU;
1814 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1815
1816
1817
1818
1819 for (Instruction *I : Instrs) {
1820 constexpr int MaxChainsToTry = 64;
1821
1822 bool MatchFound = false;
1823 auto ChainIter = MRU.begin();
1824 for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();
1825 ++J, ++ChainIter) {
1826 if (std::optional Offset = getConstantOffset(
1829
1830 (ChainIter->first->comesBefore(I) ? I : ChainIter->first))) {
1831
1832
1833 ChainIter->second.emplace_back(I, Offset.value());
1834
1835 MRU.remove(*ChainIter);
1837 MatchFound = true;
1838 break;
1839 }
1840 }
1841
1842 if (!MatchFound) {
1843 APInt ZeroOffset(ASPtrBits, 0);
1844 InstrListElem *E = new (Allocator.Allocate()) InstrListElem(I);
1845 E->second.emplace_back(I, ZeroOffset);
1848 }
1849 }
1850
1851 std::vector Ret;
1852 Ret.reserve(Chains.size());
1853
1854 for (auto &E : MRU)
1855 if (E.second.size() > 1)
1856 Ret.emplace_back(std::move(E.second));
1857 return Ret;
1858}
1859
1860std::optional Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
1861 Instruction *ContextInst,
1862 unsigned Depth) {
1863 LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA
1864 << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst
1865 << ", Depth=" << Depth << "\n");
1866
1867
1868 unsigned OrigBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
1869 APInt OffsetA(OrigBitWidth, 0);
1870 APInt OffsetB(OrigBitWidth, 0);
1873 unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
1874 if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))
1875 return std::nullopt;
1876
1877
1878
1879
1880 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1881 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1882
1883 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1884 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1885 if (PtrA == PtrB)
1886 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1887
1888
1891 LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");
1892 ConstantRange DistRange = SE.getSignedRange(DistScev);
1894
1895
1897 return (OffsetB - OffsetA + Dist).sextOrTrunc(OrigBitWidth);
1898 }
1899 }
1900 if (std::optional Diff =
1901 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth))
1902 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1903 .sextOrTrunc(OrigBitWidth);
1904 return std::nullopt;
1905}
1906
1907bool Vectorizer::accessIsAllowedAndFast(unsigned SizeBytes, unsigned AS,
1908 Align Alignment,
1909 unsigned VecElemBits) const {
1910
1911 if (Alignment.value() % SizeBytes == 0)
1912 return true;
1913
1914
1915 unsigned VectorizedSpeed = 0;
1917 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
1918 if (!AllowsMisaligned) {
1920 dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace " << AS
1921 << " with alignment " << Alignment.value()
1922 << " is misaligned, and therefore can't be vectorized.\n");
1923 return false;
1924 }
1925
1926 unsigned ElementwiseSpeed = 0;
1927 (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,
1928 Alignment, &ElementwiseSpeed);
1929 if (VectorizedSpeed < ElementwiseSpeed) {
1930 LLVM_DEBUG(dbgs() << "LSV: Access of " << SizeBytes << "B in addrspace "
1931 << AS << " with alignment " << Alignment.value()
1932 << " has relative speed " << VectorizedSpeed
1933 << ", which is lower than the elementwise speed of "
1934 << ElementwiseSpeed
1935 << ". Therefore this access won't be vectorized.\n");
1936 return false;
1937 }
1938 return true;
1939}
1940
1941ChainElem Vectorizer::createExtraElementAfter(const ChainElem &Prev, Type *Ty,
1942 APInt Offset, StringRef Prefix,
1943 Align Alignment) {
1948 PrevLoad->getPointerOperand(), Builder.getInt(Offset), Prefix + "GEP");
1949 LLVM_DEBUG(dbgs() << "LSV: Extra GEP Created: \n" << *NewGep << "\n");
1950 NewElement = Builder.CreateAlignedLoad(Ty, NewGep, Alignment, Prefix);
1951 } else {
1953
1956 LLVM_DEBUG(dbgs() << "LSV: Extra GEP Created: \n" << *NewGep << "\n");
1957 NewElement =
1959 }
1960
1961
1962
1964
1965
1966 ExtraElements.insert(NewElement);
1967
1968 APInt NewOffsetFromLeader = Prev.OffsetFromLeader + Offset;
1969 LLVM_DEBUG(dbgs() << "LSV: Extra Element Created: \n"
1970 << *NewElement
1971 << " OffsetFromLeader: " << NewOffsetFromLeader << "\n");
1972 return ChainElem{NewElement, NewOffsetFromLeader};
1973}
1974
1976 FixedVectorType *VecTy) {
1977
1979 Builder.getInt1(false));
1980
1981
1982 for (const ChainElem &E : C) {
1983 if (ExtraElements.contains(E.Inst))
1984 continue;
1985 unsigned EOffset =
1986 (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue();
1987 unsigned VecIdx =
1988 8 * EOffset / DL.getTypeSizeInBits(VecTy->getScalarType());
1989 if (FixedVectorType *VT =
1991 for (unsigned J = 0; J < VT->getNumElements(); ++J)
1992 MaskElts[VecIdx + J] = Builder.getInt1(true);
1993 else
1994 MaskElts[VecIdx] = Builder.getInt1(true);
1995 }
1997}
1998
1999void Vectorizer::deleteExtraElements() {
2000 for (auto *ExtraElement : ExtraElements) {
2002 [[maybe_unused]] bool Deleted =
2004 assert(Deleted && "Extra Load should always be trivially dead");
2005 } else {
2006
2007
2008
2010 ExtraElement->eraseFromParent();
2012 }
2013 }
2014
2015 ExtraElements.clear();
2016}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
Definition LoadStoreVectorizer.cpp:1365
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
Definition LoadStoreVectorizer.cpp:1371
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This pass exposes codegen information to IR-level passes.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition LoadStoreVectorizer.cpp:440
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Value * getPointerOperand()
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool isLegalToVectorizeLoad(LoadInst *LI) const
LLVM_ABI bool isLegalToVectorizeStore(StoreInst *SI) const
LLVM_ABI bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, MaskKind MaskKind=VariableOrConstantMask) const
Return true if the target supports masked store.
LLVM_ABI unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
LLVM_ABI bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
LLVM_ABI unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
LLVM_ABI unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
LLVM_ABI bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
LLVM_ABI bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
LLVM_ABI bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, MaskKind MaskKind=VariableOrConstantMask) const
Return true if the target supports masked load.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
ElementType
The element type of an SRV or UAV resource.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.