LLVM: lib/Transforms/Scalar/SROA.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
47#include "llvm/Config/llvm-config.h"
89#include
90#include
91#include
92#include
93#include
94#include
95#include
96#include
97#include
98#include
99#include
100#include
101
102using namespace llvm;
103
104#define DEBUG_TYPE "sroa"
105
106STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
115 "Number of loads rewritten into predicated loads to allow promotion");
117 NumStoresPredicated,
118 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumDeleted, "Number of instructions deleted");
120STATISTIC(NumVectorized, "Number of vectorized aggregates");
121
122namespace llvm {
123
127}
128
129namespace {
130
131class AllocaSliceRewriter;
132class AllocaSlices;
133class Partition;
134
135class SelectHandSpeculativity {
136 unsigned char Storage = 0;
139public:
140 SelectHandSpeculativity() = default;
141 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
142 bool isSpeculatable(bool isTrueVal) const;
143 bool areAllSpeculatable() const;
144 bool areAnySpeculatable() const;
145 bool areNoneSpeculatable() const;
146
147 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149};
150static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
151
152using PossiblySpeculatableLoad =
154using UnspeculatableStore = StoreInst *;
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177class SROA {
178 LLVMContext *const C;
179 DomTreeUpdater *const DTU;
180 AssumptionCache *const AC;
181 const bool PreserveCFG;
182
183
184
185
186
187
188
189
190 SmallSetVector<AllocaInst *, 16> Worklist;
191
192
193
194
196
197
198
199
200
201
202
203
204
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
206
207
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
210 PromotableAllocas;
211
212
213
214
215
216
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
218
219
220
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239 static std::optional
240 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
241
242public:
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
247
248
249 std::pair<bool , bool > runSROA(Function &F);
250
251private:
252 friend class AllocaSliceRewriter;
253
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
258 std::pair<bool , bool > runOnAlloca(AllocaInst &AI);
259 void clobberUse(Use &U);
260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
261 bool promoteAllocas();
262};
263
264}
265
266
267
268
269
270
271
272
273
274namespace {
275enum FragCalcResult { UseFrag, UseNoFrag, Skip };
276}
277static FragCalcResult
279 uint64_t NewStorageSliceOffsetInBits,
280 uint64_t NewStorageSliceSizeInBits,
281 std::optionalDIExpression::FragmentInfo StorageFragment,
282 std::optionalDIExpression::FragmentInfo CurrentFragment,
284
285
286 if (StorageFragment) {
288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
289 Target.OffsetInBits =
290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
291 } else {
292 Target.SizeInBits = NewStorageSliceSizeInBits;
293 Target.OffsetInBits = NewStorageSliceOffsetInBits;
294 }
295
296
297
298
299 if (!CurrentFragment) {
300 if (auto Size = Variable->getSizeInBits()) {
301
303 if (Target == CurrentFragment)
304 return UseNoFrag;
305 }
306 }
307
308
309
310 if (!CurrentFragment || *CurrentFragment == Target)
311 return UseFrag;
312
313
314
315
316 if (Target.startInBits() < CurrentFragment->startInBits() ||
317 Target.endInBits() > CurrentFragment->endInBits())
318 return Skip;
319
320
321 return UseFrag;
322}
323
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
344 uint64_t OldAllocaOffsetInBits,
348
349
350
351
353
355
356 if (DVRAssignMarkerRange.empty())
357 return;
358
360 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
361 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
362 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
363 << "\n");
364 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
365 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
370
371
373 BaseFragments;
376 DVR->getExpression()->getFragmentInfo();
377
378
379
385
387 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
388 << "\n");
389 auto *Expr = DbgAssign->getExpression();
390 bool SetKillLocation = false;
391
392 if (IsSplit) {
393 std::optionalDIExpression::FragmentInfo BaseFragment;
394 {
396 if (R == BaseFragments.end())
397 return;
398 BaseFragment = R->second;
399 }
400 std::optionalDIExpression::FragmentInfo CurrentFragment =
401 Expr->getFragmentInfo();
404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
405 BaseFragment, CurrentFragment, NewFragment);
406
407 if (Result == Skip)
408 return;
409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
410 if (CurrentFragment) {
411
412
413
414
415 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
416 }
417
420 Expr = *E;
421 } else {
422
423
424
428 SetKillLocation = true;
429 }
430 }
431 }
432
433
434 if (!NewID) {
436 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
437 }
438
440 if (IsSplit) {
443 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
445 DbgAssign->getDebugLoc())));
446 } else {
447
448 NewAssign = DbgAssign;
449 NewAssign->setAssignId(NewID);
454 }
455
456
457
458
459
460
461
462
463
464
465
466 SetKillLocation |=
467 Value && (DbgAssign->hasArgList() ||
468 !DbgAssign->getExpression()->isSingleLocationExpression());
469 if (SetKillLocation)
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485 if (NewAssign != DbgAssign) {
486 NewAssign->moveBefore(DbgAssign->getIterator());
487 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
488 }
489 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
490 };
491
492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
493}
494
495namespace {
496
497
498
500 std::string Prefix;
501
502 Twine getNameWithPrefix(const Twine &Name) const {
503 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
504 }
505
506public:
507 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
508
509 void InsertHelper(Instruction *I, const Twine &Name,
512 InsertPt);
513 }
514};
515
516
518
519
520
521
522
523
524
525class Slice {
526
527 uint64_t BeginOffset = 0;
528
529
530 uint64_t EndOffset = 0;
531
532
533
534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
535
536public:
537 Slice() = default;
538
539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable,
540 Value *ProtectedFieldDisc)
541 : BeginOffset(BeginOffset), EndOffset(EndOffset),
542 UseAndIsSplittable(U, IsSplittable),
543 ProtectedFieldDisc(ProtectedFieldDisc) {}
544
545 uint64_t beginOffset() const { return BeginOffset; }
546 uint64_t endOffset() const { return EndOffset; }
547
548 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
549 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
550
551 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
552
553 bool isDead() const { return getUse() == nullptr; }
554 void kill() { UseAndIsSplittable.setPointer(nullptr); }
555
556
557
558 Value *ProtectedFieldDisc;
559
560
561
562
563
564
565
567 if (beginOffset() < RHS.beginOffset())
568 return true;
569 if (beginOffset() > RHS.beginOffset())
570 return false;
571 if (isSplittable() != RHS.isSplittable())
572 return !isSplittable();
573 if (endOffset() > RHS.endOffset())
574 return true;
575 return false;
576 }
577
578
579 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
580 return LHS.beginOffset() < RHSOffset;
581 }
582 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
583 return LHSOffset < RHS.beginOffset();
584 }
585
587 return isSplittable() == RHS.isSplittable() &&
588 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
589 }
591};
592
593
594
595
596
597
598
599
600class AllocaSlices {
601public:
602
603 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
604
605
606
607
608
609 bool isEscaped() const { return PointerEscapingInstr; }
610 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
611
612
613
615 using range = iterator_range;
616
617 iterator begin() { return Slices.begin(); }
618 iterator end() { return Slices.end(); }
619
621 using const_range = iterator_range<const_iterator>;
622
623 const_iterator begin() const { return Slices.begin(); }
624 const_iterator end() const { return Slices.end(); }
625
626
627
628 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
629
630
631
632
633
634
636 int OldSize = Slices.size();
637 Slices.append(NewSlices.begin(), NewSlices.end());
638 auto SliceI = Slices.begin() + OldSize;
639 std::stable_sort(SliceI, Slices.end());
640 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
641 }
642
643
644
647
648
650
651
652
654
655
657 return DeadUseIfPromotable;
658 }
659
660
661
662
663
664
665
666 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
667
668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
670 void printSlice(raw_ostream &OS, const_iterator I,
671 StringRef Indent = " ") const;
672 void printUse(raw_ostream &OS, const_iterator I,
673 StringRef Indent = " ") const;
674 void print(raw_ostream &OS) const;
675 void dump(const_iterator I) const;
676 void dump() const;
677#endif
678
679private:
680 template <typename DerivedT, typename RetT = void> class BuilderBase;
682
683 friend class AllocaSlices::SliceBuilder;
684
685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
686
687 AllocaInst &AI;
688#endif
689
690
691
692
693
694
695
697 Instruction *PointerEscapingInstrReadOnly;
698
699
700
701
702
703
704
706
707
708
709
710
711
712
713 SmallVector<Instruction *, 8> DeadUsers;
714
715
716
718
719
721
722
723
724
725
726
727
728
729
731};
732
733
734
735
736
737
738
739
740
741
742class Partition {
743private:
744 friend class AllocaSlices;
745 friend class AllocaSlices::partition_iterator;
746
747 using iterator = AllocaSlices::iterator;
748
749
750
751 uint64_t BeginOffset = 0, EndOffset = 0;
752
753
754 iterator SI, SJ;
755
756
758
759
760
761 Partition(iterator SI) : SI(SI), SJ(SI) {}
762
763public:
764
765
766
767 uint64_t beginOffset() const { return BeginOffset; }
768
769
770
771
772 uint64_t endOffset() const { return EndOffset; }
773
774
775
776
777 uint64_t size() const {
778 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
779 return EndOffset - BeginOffset;
780 }
781
782
783
784 bool empty() const { return SI == SJ; }
785
786
787
788
789
790
791
792
793
794
795 iterator begin() const { return SI; }
796 iterator end() const { return SJ; }
797
798
799
800
801
802
803
805};
806
807}
808
809
810
811
812
813
814
815
816
817
820 Partition> {
822
823
824
825 Partition P;
826
827
828 AllocaSlices::iterator SE;
829
830
831
832 uint64_t MaxSplitSliceEndOffset = 0;
833
834
835
836 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
837 : P(SI), SE(SE) {
838
839
840 if (SI != SE)
841 advance();
842 }
843
844
845
846
847 void advance() {
848 assert((P.SI != SE || .SplitTails.empty()) &&
849 "Cannot advance past the end of the slices!");
850
851
852 if (.SplitTails.empty()) {
853 if (P.EndOffset >= MaxSplitSliceEndOffset) {
854
855 P.SplitTails.clear();
856 MaxSplitSliceEndOffset = 0;
857 } else {
858
859
860
862 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
864 [&](Slice *S) {
865 return S->endOffset() == MaxSplitSliceEndOffset;
866 }) &&
867 "Could not find the current max split slice offset!");
869 [&](Slice *S) {
870 return S->endOffset() <= MaxSplitSliceEndOffset;
871 }) &&
872 "Max split slice end offset is not actually the max!");
873 }
874 }
875
876
877
878 if (P.SI == SE) {
879 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
880 return;
881 }
882
883
884
885 if (P.SI != P.SJ) {
886
887
888 for (Slice &S : P)
889 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
890 P.SplitTails.push_back(&S);
891 MaxSplitSliceEndOffset =
892 std::max(S.endOffset(), MaxSplitSliceEndOffset);
893 }
894
895
896 P.SI = P.SJ;
897
898
899 if (P.SI == SE) {
900 P.BeginOffset = P.EndOffset;
901 P.EndOffset = MaxSplitSliceEndOffset;
902 return;
903 }
904
905
906
907
908 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
909 !P.SI->isSplittable()) {
910 P.BeginOffset = P.EndOffset;
911 P.EndOffset = P.SI->beginOffset();
912 return;
913 }
914 }
915
916
917
918
919
920
921 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
922 P.EndOffset = P.SI->endOffset();
923 ++P.SJ;
924
925
926
927 if (!P.SI->isSplittable()) {
928
929
930 assert(P.BeginOffset == P.SI->beginOffset());
931
932
933
934 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
935 if (!P.SJ->isSplittable())
936 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
937 ++P.SJ;
938 }
939
940
941
942 return;
943 }
944
945
946
947
948 assert(P.SI->isSplittable() && "Forming a splittable partition!");
949
950
951 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
952 P.SJ->isSplittable()) {
953 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
954 ++P.SJ;
955 }
956
957
958
959
960 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
961 assert(!P.SJ->isSplittable());
962 P.EndOffset = P.SJ->beginOffset();
963 }
964 }
965
966public:
967 bool operator==(const partition_iterator &RHS) const {
968 assert(SE == RHS.SE &&
969 "End iterators don't match between compared partition iterators!");
970
971
972
973
974
975
976 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
977 assert(P.SJ == RHS.P.SJ &&
978 "Same set of slices formed two different sized partitions!");
979 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
980 "Same slice position with differently sized non-empty split "
981 "slice tails!");
982 return true;
983 }
984 return false;
985 }
986
988 advance();
989 return *this;
990 }
991
993};
994
995
996
997
998
999
1000
1001
1003 return make_range(partition_iterator(begin(), end()),
1004 partition_iterator(end(), end()));
1005}
1006
1008
1009
1010
1012 return SI.getOperand(1 + CI->isZero());
1013 if (SI.getOperand(1) == SI.getOperand(2))
1014 return SI.getOperand(1);
1015
1016 return nullptr;
1017}
1018
1019
1022
1023 return PN->hasConstantValue();
1024 }
1026}
1027
1028
1029
1030
1031
1035
1037
1039 AllocaSlices &AS;
1040
1043
1044
1046
1047
1048
1049 Value *ProtectedFieldDisc = nullptr;
1050
1051public:
1054 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1055 AS(AS) {}
1056
1057private:
1059 if (VisitedDeadInsts.insert(&I).second)
1061 }
1062
1064 bool IsSplittable = false) {
1065
1066
1067 if (Size == 0 || Offset.uge(AllocSize)) {
1070 << " which has zero size or starts outside of the "
1071 << AllocSize << " byte alloca:\n"
1072 << " alloca: " << AS.AI << "\n"
1073 << " use: " << I << "\n");
1074 return markAsDead(I);
1075 }
1076
1077 uint64_t BeginOffset = Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset + Size;
1079
1080
1081
1082
1083
1084
1085
1086 assert(AllocSize >= BeginOffset);
1087 if (Size > AllocSize - BeginOffset) {
1088 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1089 << Offset << " to remain within the " << AllocSize
1090 << " byte alloca:\n"
1091 << " alloca: " << AS.AI << "\n"
1092 << " use: " << I << "\n");
1093 EndOffset = AllocSize;
1094 }
1095
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1098 }
1099
1100 void visitBitCastInst(BitCastInst &BC) {
1102 return markAsDead(BC);
1103
1104 return Base::visitBitCastInst(BC);
1105 }
1106
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1109 return markAsDead(ASC);
1110
1111 return Base::visitAddrSpaceCastInst(ASC);
1112 }
1113
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1116 return markAsDead(GEPI);
1117
1118 return Base::visitGetElementPtrInst(GEPI);
1119 }
1120
1121 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1122 uint64_t Size, bool IsVolatile) {
1123
1124
1125
1126 bool IsSplittable =
1128
1129 insertUse(I, Offset, Size, IsSplittable);
1130 }
1131
1132 void visitLoadInst(LoadInst &LI) {
1134 "All simple FCA loads should have been pre-split");
1135
1136
1137
1138 if (!IsOffsetKnown)
1139 return PI.setEscapedReadOnly(&LI);
1140
1141 TypeSize Size = DL.getTypeStoreSize(LI.getType());
1142 if (Size.isScalable()) {
1144 if (!VScale)
1145 return PI.setAborted(&LI);
1146
1148 }
1149
1150 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1152 }
1153
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp = SI.getValueOperand();
1156 if (ValOp == *U)
1157 return PI.setEscapedAndAborted(&SI);
1158 if (!IsOffsetKnown)
1159 return PI.setAborted(&SI);
1160
1161 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1163 unsigned VScale = SI.getFunction()->getVScaleValue();
1164 if (!VScale)
1165 return PI.setAborted(&SI);
1166
1168 }
1169
1171
1172
1173
1174
1175
1176
1177
1178
1179 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1180 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1181 << Offset << " which extends past the end of the "
1182 << AllocSize << " byte alloca:\n"
1183 << " alloca: " << AS.AI << "\n"
1184 << " use: " << SI << "\n");
1185 return markAsDead(SI);
1186 }
1187
1189 "All simple FCA stores should have been pre-split");
1191 }
1192
1193 void visitMemSetInst(MemSetInst &II) {
1194 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1197 (IsOffsetKnown && Offset.uge(AllocSize)))
1198
1199 return markAsDead(II);
1200
1201 if (!IsOffsetKnown)
1202 return PI.setAborted(&II);
1203
1206 : AllocSize - Offset.getLimitedValue(),
1208 }
1209
1210 void visitMemTransferInst(MemTransferInst &II) {
1213
1214 return markAsDead(II);
1215
1216
1217
1218 if (VisitedDeadInsts.count(&II))
1219 return;
1220
1221 if (!IsOffsetKnown)
1222 return PI.setAborted(&II);
1223
1224
1225
1226
1227
1228
1229 if (Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.find(&II);
1232 if (MTPI != MemTransferSliceMap.end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(II);
1235 }
1236
1237 uint64_t RawOffset = Offset.getLimitedValue();
1238 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1239
1240
1241
1242 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1243
1244 if (.isVolatile())
1245 return markAsDead(II);
1246
1247 return insertUse(II, Offset, Size, false);
1248 }
1249
1250
1251
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1257 if (!Inserted) {
1258 Slice &PrevP = AS.Slices[PrevIdx];
1259
1260
1261
1262 if (.isVolatile() && PrevP.beginOffset() == RawOffset) {
1263 PrevP.kill();
1264 return markAsDead(II);
1265 }
1266
1267
1268
1269 PrevP.makeUnsplittable();
1270 }
1271
1272
1274
1275
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1277 "Map index doesn't point back to a slice with this user.");
1278 }
1279
1280
1281
1282
1283 void visitIntrinsicInst(IntrinsicInst &II) {
1284 if (II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1286 return;
1287 }
1288
1289 if (!IsOffsetKnown)
1290 return PI.setAborted(&II);
1291
1292 if (II.isLifetimeStartOrEnd()) {
1293 insertUse(II, Offset, AllocSize, true);
1294 return;
1295 }
1296
1297 if (II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1298
1299
1300
1301 AS.PFPUsers.push_back(&II);
1302 ProtectedFieldDisc = II.getArgOperand(1);
1303 for (Use &U : II.uses()) {
1306 visitLoadInst(*LI);
1308 visitStoreInst(*SI);
1309 else
1310 PI.setAborted(&II);
1311 if (PI.isAborted())
1312 break;
1313 }
1314 ProtectedFieldDisc = nullptr;
1315 return;
1316 }
1317
1318 Base::visitIntrinsicInst(II);
1319 }
1320
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1322
1323
1324
1325
1326 SmallPtrSet<Instruction *, 4> Visited;
1328 Visited.insert(Root);
1331
1332
1334 do {
1336 std::tie(UsedI, I) = Uses.pop_back_val();
1337
1339 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1341 PI.setAborted(LI);
1342 return nullptr;
1343 }
1345 continue;
1346 }
1349 if (Op == UsedI)
1350 return SI;
1351 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1353 PI.setAborted(SI);
1354 return nullptr;
1355 }
1357 continue;
1358 }
1359
1361 if (->hasAllZeroIndices())
1362 return GEP;
1365 return I;
1366 }
1367
1368 for (User *U : I->users())
1371 } while (.empty());
1372
1373 return nullptr;
1374 }
1375
1376 void visitPHINodeOrSelectInst(Instruction &I) {
1378 if (I.use_empty())
1379 return markAsDead(I);
1380
1381
1382
1383
1385 I.getParent()->getFirstInsertionPt() == I.getParent()->end())
1386 return PI.setAborted(&I);
1387
1388
1389
1390
1391
1392
1393
1394
1395
1397 if (Result == *U)
1398
1399
1400 enqueueUsers(I);
1401 else
1402
1403
1404 AS.DeadOperands.push_back(U);
1405
1406 return;
1407 }
1408
1409 if (!IsOffsetKnown)
1410 return PI.setAborted(&I);
1411
1412
1413 uint64_t &Size = PHIOrSelectSizes[&I];
1414 if () {
1415
1416 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1417 return PI.setAborted(UnsafeI);
1418 }
1419
1420
1421
1422
1423
1424
1425
1426 if (Offset.uge(AllocSize)) {
1427 AS.DeadOperands.push_back(U);
1428 return;
1429 }
1430
1432 }
1433
1434 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1435
1436 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1437
1438
1439 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1440
1441 void visitCallBase(CallBase &CB) {
1442
1443
1447 PI.setEscapedReadOnly(&CB);
1448 return;
1449 }
1450
1451 Base::visitCallBase(CB);
1452 }
1453};
1454
1455AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1456 :
1457#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1458 AI(AI),
1459#endif
1460 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1462 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1463 if (PtrI.isEscaped() || PtrI.isAborted()) {
1464
1465
1466 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1467 : PtrI.getAbortingInst();
1468 assert(PointerEscapingInstr && "Did not track a bad instruction");
1469 return;
1470 }
1471 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1472
1473 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1474
1475
1476
1478}
1479
1480#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1481
1482void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1483 StringRef Indent) const {
1484 printSlice(OS, I, Indent);
1485 OS << "\n";
1486 printUse(OS, I, Indent);
1487}
1488
1489void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1490 StringRef Indent) const {
1491 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1492 << " slice #" << (I - begin())
1493 << (I->isSplittable() ? " (splittable)" : "");
1494}
1495
1496void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1497 StringRef Indent) const {
1498 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1499}
1500
1501void AllocaSlices::print(raw_ostream &OS) const {
1502 if (PointerEscapingInstr) {
1503 OS << "Can't analyze slices for alloca: " << AI << "\n"
1504 << " A pointer to this alloca escaped by:\n"
1505 << " " << *PointerEscapingInstr << "\n";
1506 return;
1507 }
1508
1509 if (PointerEscapingInstrReadOnly)
1510 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1511
1512 OS << "Slices of alloca: " << AI << "\n";
1513 for (const_iterator I = begin(), E = end(); I != E; ++I)
1515}
1516
1517LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1519}
1521
1522#endif
1523
1524
1525
1526static std::pair<Type *, IntegerType *>
1527findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1529 Type *Ty = nullptr;
1530 bool TyIsCommon = true;
1532
1533
1534
1535 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1538 continue;
1539 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1540 continue;
1541
1542 Type *UserTy = nullptr;
1546 UserTy = SI->getValueOperand()->getType();
1547 }
1548
1550
1551
1552
1553
1554 if (UserITy->getBitWidth() % 8 != 0 ||
1555 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1556 continue;
1557
1558
1559
1560 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1561 ITy = UserITy;
1562 }
1563
1564
1565
1566 if (!UserTy || (Ty && Ty != UserTy))
1567 TyIsCommon = false;
1568 else
1569 Ty = UserTy;
1570 }
1571
1572 return {TyIsCommon ? Ty : nullptr, ITy};
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1595
1596
1597
1598
1599
1603 Type *LoadType = nullptr;
1607 return false;
1608
1609
1610
1611
1613 return false;
1614
1615 if (LoadType) {
1616 if (LoadType != LI->getType())
1617 return false;
1618 } else {
1619 LoadType = LI->getType();
1620 }
1621
1622
1623
1625 if (BBI->mayWriteToMemory())
1626 return false;
1627
1628 MaxAlign = std::max(MaxAlign, LI->getAlign());
1629 }
1630
1631 if (!LoadType)
1632 return false;
1633
1634 APInt LoadSize =
1635 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1636
1637
1638
1639
1640 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1643
1644
1645
1646
1648 return false;
1649
1650
1651
1653 continue;
1654
1655
1656
1657
1659 continue;
1660
1661 return false;
1662 }
1663
1664 return true;
1665}
1666
1669
1672 IRB.SetInsertPoint(&PN);
1674 PN.getName() + ".sroa.speculated");
1675
1676
1677
1680
1681
1686 }
1687
1688
1690 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1693
1694
1695
1696
1697
1698 if (Value *V = InjectedLoads.lookup(Pred)) {
1700 continue;
1701 }
1702
1703 Instruction *TI = Pred->getTerminator();
1704 IRB.SetInsertPoint(TI);
1705
1706 LoadInst *Load = IRB.CreateAlignedLoad(
1707 LoadTy, InVal, Alignment,
1708 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1709 ++NumLoadsSpeculated;
1710 if (AATags)
1711 Load->setAAMetadata(AATags);
1713 InjectedLoads[Pred] = Load;
1714 }
1715
1716 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1718}
1719
1720SelectHandSpeculativity &
1721SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1722 if (isTrueVal)
1724 else
1726 return *this;
1727}
1728
1729bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1732}
1733
1734bool SelectHandSpeculativity::areAllSpeculatable() const {
1735 return isSpeculatable(true) &&
1736 isSpeculatable(false);
1737}
1738
1739bool SelectHandSpeculativity::areAnySpeculatable() const {
1740 return isSpeculatable(true) ||
1741 isSpeculatable(false);
1742}
1743bool SelectHandSpeculativity::areNoneSpeculatable() const {
1744 return !areAnySpeculatable();
1745}
1746
1747static SelectHandSpeculativity
1750 SelectHandSpeculativity Spec;
1751
1753 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1755 &LI))
1756 Spec.setAsSpeculatable(Value == SI.getTrueValue());
1758 return Spec;
1759
1760 return Spec;
1761}
1762
1763std::optional
1764SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1765 RewriteableMemOps Ops;
1766
1767 for (User *U : SI.users()) {
1770
1772
1773
1774
1776 return {};
1777 Ops.emplace_back(Store);
1778 continue;
1779 }
1780
1782
1783
1784
1786 return {};
1787
1788 PossiblySpeculatableLoad Load(LI);
1790
1791
1793 return {};
1794 Ops.emplace_back(Load);
1795 continue;
1796 }
1797
1798 SelectHandSpeculativity Spec =
1800 if (PreserveCFG && !Spec.areAllSpeculatable())
1801 return {};
1802
1803 Load.setInt(Spec);
1804 Ops.emplace_back(Load);
1805 }
1806
1807 return Ops;
1808}
1809
1811 IRBuilderTy &IRB) {
1813
1814 Value *TV = SI.getTrueValue();
1815 Value *FV = SI.getFalseValue();
1816
1817
1818 assert(LI.isSimple() && "We only speculate simple loads");
1819
1820 IRB.SetInsertPoint(&LI);
1821
1824 LI.getName() + ".sroa.speculate.load.true");
1827 LI.getName() + ".sroa.speculate.load.false");
1828 NumLoadsSpeculated += 2;
1829
1830
1833
1835 if (Tags) {
1838 }
1839
1840 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1841 LI.getName() + ".sroa.speculated",
1843
1844 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1846}
1847
1848template
1850 SelectHandSpeculativity Spec,
1853 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1857 if (Spec.areNoneSpeculatable())
1859 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1860 else {
1862 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1863 nullptr, nullptr);
1864 if (Spec.isSpeculatable(true))
1866 }
1868 Spec = {};
1870 Tail->setName(Head->getName() + ".cont");
1875 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1876 int SuccIdx = IsThen ? 0 : 1;
1877 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1878 auto &CondMemOp = cast(*I.clone());
1879 if (NewMemOpBB != Head) {
1880 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1882 ++NumLoadsPredicated;
1883 else
1884 ++NumStoresPredicated;
1885 } else {
1886 CondMemOp.dropUBImplyingAttrsAndMetadata();
1887 ++NumLoadsSpeculated;
1888 }
1889 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1890 Value *Ptr = SI.getOperand(1 + SuccIdx);
1891 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1893 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1894 PN->addIncoming(&CondMemOp, NewMemOpBB);
1895 } else
1897 }
1901 I.replaceAllUsesWith(PN);
1902 }
1903}
1904
1906 SelectHandSpeculativity Spec,
1912 else
1914}
1915
1917 const RewriteableMemOps &Ops,
1919 bool CFGChanged = false;
1921
1922 for (const RewriteableMemOp &Op : Ops) {
1923 SelectHandSpeculativity Spec;
1925 if (auto *const *US = std::get_if(&Op)) {
1926 I = *US;
1927 } else {
1928 auto PSL = std::get(Op);
1929 I = PSL.getPointer();
1930 Spec = PSL.getInt();
1931 }
1932 if (Spec.areAllSpeculatable()) {
1934 } else {
1935 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1937 CFGChanged = true;
1938 }
1939 I->eraseFromParent();
1940 }
1941
1944 SI.eraseFromParent();
1945 return CFGChanged;
1946}
1947
1948
1949
1952 const Twine &NamePrefix) {
1954 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1955 NamePrefix + "sroa_idx");
1956 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1957 NamePrefix + "sroa_cast");
1958}
1959
1960
1964
1965
1966
1967
1968
1969
1970
1972 unsigned VScale = 0) {
1973 if (OldTy == NewTy)
1974 return true;
1975
1976
1977
1978
1982 "We can't have the same bitwidth for different int types");
1983 return false;
1984 }
1985
1986 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
1987 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
1988
1991
1992 if (!VScale)
1993 return false;
1994
1995
1996
1997
1998 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
1999 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
2000
2003 return false;
2004
2006 } else {
2008 return false;
2009
2011 }
2012 }
2013
2014 if (NewSize != OldSize)
2015 return false;
2017 return false;
2018
2019
2020
2027
2028
2029
2030 return OldAS == NewAS ||
2031 (.isNonIntegralAddressSpace(OldAS) &&
2032 .isNonIntegralAddressSpace(NewAS) &&
2033 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2034 }
2035
2036
2037
2039 return .isNonIntegralPointerType(NewTy);
2040
2041
2042
2043 if (.isNonIntegralPointerType(OldTy))
2045
2046 return false;
2047 }
2048
2050 return false;
2051
2052 return true;
2053}
2054
2055
2056
2057
2058
2059
2060
2062 Type *NewTy) {
2063 Type *OldTy = V->getType();
2064
2065#ifndef NDEBUG
2066 BasicBlock *BB = IRB.GetInsertBlock();
2070 "Value not convertable to type");
2071#endif
2072
2073 if (OldTy == NewTy)
2074 return V;
2075
2077 "Integer types must be the exact same to convert.");
2078
2079
2080
2081 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {
2082 Type *InTy = In->getType();
2083 if (InTy == Ty)
2084 return In;
2085
2087
2088
2090 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2092 IRB.getInt64(0)),
2093 Ty);
2094 }
2095
2097
2098
2100 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2101 IRB.getInt64(0));
2102 }
2103
2104 return IRB.CreateBitCast(In, Ty);
2105 };
2106
2107
2109
2110
2111
2112
2113 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),
2114 NewTy);
2115 }
2116
2117
2119
2120
2121
2122
2123 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2124 NewTy);
2125 }
2126
2130
2131
2132
2133
2134
2135
2136 if (OldAS != NewAS) {
2137 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2138 return IRB.CreateIntToPtr(
2139 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2140 DL.getIntPtrType(NewTy)),
2141 NewTy);
2142 }
2143 }
2144
2145 return CreateBitCastLike(V, NewTy);
2146}
2147
2148
2149
2150
2151
2156 unsigned VScale) {
2157
2159 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2160 uint64_t BeginIndex = BeginOffset / ElementSize;
2161 if (BeginIndex * ElementSize != BeginOffset ||
2163 return false;
2164 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2165 uint64_t EndIndex = EndOffset / ElementSize;
2166 if (EndIndex * ElementSize != EndOffset ||
2168 return false;
2169
2170 assert(EndIndex > BeginIndex && "Empty vector!");
2171 uint64_t NumElements = EndIndex - BeginIndex;
2172 Type *SliceTy = (NumElements == 1)
2173 ? Ty->getElementType()
2175
2176 Type *SplitIntTy =
2177 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2178
2179 Use *U = S.getUse();
2180
2182 if (MI->isVolatile())
2183 return false;
2184 if (!S.isSplittable())
2185 return false;
2187 if (->isLifetimeStartOrEnd() &&
->isDroppable())
2188 return false;
2191 return false;
2193
2194 if (LTy->isStructTy())
2195 return false;
2196 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2197 assert(LTy->isIntegerTy());
2198 LTy = SplitIntTy;
2199 }
2201 return false;
2203 if (SI->isVolatile())
2204 return false;
2205 Type *STy = SI->getValueOperand()->getType();
2206
2208 return false;
2209 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2211 STy = SplitIntTy;
2212 }
2214 return false;
2215 } else {
2216 return false;
2217 }
2218
2219 return true;
2220}
2221
2222
2223
2224
2225
2229 bool HaveCommonEltTy, Type *CommonEltTy,
2230 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2231 VectorType *CommonVecPtrTy, unsigned VScale) {
2232
2233 if (CandidateTys.empty())
2234 return nullptr;
2235
2236
2237
2238
2239
2240 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2241 return nullptr;
2242
2243
2244 if (!HaveCommonEltTy && HaveVecPtrTy) {
2245
2246 CandidateTys.clear();
2247 CandidateTys.push_back(CommonVecPtrTy);
2248 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2249
2250 for (VectorType *&VTy : CandidateTys) {
2251 if (!VTy->getElementType()->isIntegerTy())
2253 VTy->getContext(), VTy->getScalarSizeInBits())));
2254 }
2255
2256
2257
2259 (void)DL;
2260 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2261 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2262 "Cannot have vector types of different sizes!");
2263 assert(RHSTy->getElementType()->isIntegerTy() &&
2264 "All non-integer types eliminated!");
2265 assert(LHSTy->getElementType()->isIntegerTy() &&
2266 "All non-integer types eliminated!");
2269 };
2271 (void)DL;
2272 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2273 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2274 "Cannot have vector types of different sizes!");
2275 assert(RHSTy->getElementType()->isIntegerTy() &&
2276 "All non-integer types eliminated!");
2277 assert(LHSTy->getElementType()->isIntegerTy() &&
2278 "All non-integer types eliminated!");
2281 };
2282 llvm::sort(CandidateTys, RankVectorTypesComp);
2283 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2284 CandidateTys.end());
2285 } else {
2286
2287
2288#ifndef NDEBUG
2289 for (VectorType *VTy : CandidateTys) {
2290 assert(VTy->getElementType() == CommonEltTy &&
2291 "Unaccounted for element type!");
2292 assert(VTy == CandidateTys[0] &&
2293 "Different vector types with the same element type!");
2294 }
2295#endif
2296 CandidateTys.resize(1);
2297 }
2298
2299
2300
2303 std::numeric_limits::max();
2304 });
2305
2306
2309 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2310
2311
2312
2313 if (ElementSize % 8)
2314 return false;
2315 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2316 "vector size not a multiple of element size?");
2317 ElementSize /= 8;
2318
2319 for (const Slice &S : P)
2321 return false;
2322
2323 for (const Slice *S : P.splitSliceTails())
2325 return false;
2326
2327 return true;
2328 });
2329 return VTy != CandidateTys.end() ? *VTy : nullptr;
2330}
2331
2336 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2337 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2338 [[maybe_unused]] VectorType *OriginalElt =
2339 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2340
2341
2342 for (Type *Ty : OtherTys) {
2344 continue;
2345 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2346
2347
2348 for (VectorType *const VTy : CandidateTysCopy) {
2349
2350 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2351 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2352 unsigned ElementSize =
2353 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2355 VectorSize % TypeSize == 0) {
2357 CheckCandidateType(NewVTy);
2358 }
2359 }
2360 }
2361
2363 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2364 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2365}
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2377 unsigned VScale) {
2378
2379
2383 Type *CommonEltTy = nullptr;
2384 VectorType *CommonVecPtrTy = nullptr;
2385 bool HaveVecPtrTy = false;
2386 bool HaveCommonEltTy = true;
2387 bool HaveCommonVecPtrTy = true;
2388 auto CheckCandidateType = [&](Type *Ty) {
2390
2391 if (!CandidateTys.empty()) {
2393 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2394 DL.getTypeSizeInBits(V).getFixedValue()) {
2395 CandidateTys.clear();
2396 return;
2397 }
2398 }
2400 Type *EltTy = VTy->getElementType();
2401
2402 if (!CommonEltTy)
2403 CommonEltTy = EltTy;
2404 else if (CommonEltTy != EltTy)
2405 HaveCommonEltTy = false;
2406
2408 HaveVecPtrTy = true;
2409 if (!CommonVecPtrTy)
2410 CommonVecPtrTy = VTy;
2411 else if (CommonVecPtrTy != VTy)
2412 HaveCommonVecPtrTy = false;
2413 }
2414 }
2415 };
2416
2417
2418 for (const Slice &S : P) {
2423 Ty = SI->getValueOperand()->getType();
2424 else
2425 continue;
2426
2427 auto CandTy = Ty->getScalarType();
2428 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2429 S.endOffset() != P.endOffset())) {
2430 DeferredTys.insert(Ty);
2431 continue;
2432 }
2433
2434 LoadStoreTys.insert(Ty);
2435
2436 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2437 CheckCandidateType(Ty);
2438 }
2439
2442 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2443 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2444 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2445 return VTy;
2446
2447 CandidateTys.clear();
2449 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2450 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2451 CommonVecPtrTy, VScale);
2452}
2453
2454
2455
2456
2457
2460 Type *AllocaTy,
2462 bool &WholeAllocaOp) {
2463 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2464
2465 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2466 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2467
2468 Use *U = S.getUse();
2469
2470
2471
2472
2473
2475 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2476 return true;
2477 }
2478
2479
2480
2481 if (RelEnd > Size)
2482 return false;
2483
2486 return false;
2487
2490 return false;
2491
2492
2493 if (S.beginOffset() < AllocBeginOffset)
2494 return false;
2495
2496
2497
2499 WholeAllocaOp = true;
2501 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2502 return false;
2503 } else if (RelBegin != 0 || RelEnd != Size ||
2505
2506
2507 return false;
2508 }
2510 Type *ValueTy = SI->getValueOperand()->getType();
2511 if (SI->isVolatile())
2512 return false;
2513
2514 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
2516 return false;
2517
2518
2519 if (S.beginOffset() < AllocBeginOffset)
2520 return false;
2521
2522
2523
2525 WholeAllocaOp = true;
2527 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2528 return false;
2529 } else if (RelBegin != 0 || RelEnd != Size ||
2531
2532
2533 return false;
2534 }
2537 return false;
2538 if (!S.isSplittable())
2539 return false;
2540 } else {
2541 return false;
2542 }
2543
2544 return true;
2545}
2546
2547
2548
2549
2550
2551
2552
2555 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2556
2558 return false;
2559
2560
2561 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2562 return false;
2563
2564
2565
2566
2570 return false;
2571
2572
2573
2574
2575
2576
2577
2578
2579 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2580
2581 for (const Slice &S : P)
2583 WholeAllocaOp))
2584 return false;
2585
2586 for (const Slice *S : P.splitSliceTails())
2588 WholeAllocaOp))
2589 return false;
2590
2591 return WholeAllocaOp;
2592}
2593
2596 const Twine &Name) {
2599 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2600 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2601 "Element extends past full value");
2603 if (DL.isBigEndian())
2604 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2605 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2606 if (ShAmt) {
2607 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2609 }
2610 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2611 "Cannot extract to a larger integer!");
2612 if (Ty != IntTy) {
2613 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2615 }
2616 return V;
2617}
2618
2623 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2624 "Cannot insert a larger integer!");
2626 if (Ty != IntTy) {
2627 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2629 }
2630 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2631 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2632 "Element store outside of alloca store");
2634 if (DL.isBigEndian())
2635 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2636 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2637 if (ShAmt) {
2638 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2640 }
2641
2642 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2643 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2644 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2646 V = IRB.CreateOr(Old, V, Name + ".insert");
2648 }
2649 return V;
2650}
2651
2653 unsigned EndIndex, const Twine &Name) {
2655 unsigned NumElements = EndIndex - BeginIndex;
2657
2658 if (NumElements == VecTy->getNumElements())
2659 return V;
2660
2661 if (NumElements == 1) {
2662 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2663 Name + ".extract");
2665 return V;
2666 }
2667
2669 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2671 return V;
2672}
2673
2675 unsigned BeginIndex, const Twine &Name) {
2677 assert(VecTy && "Can only insert a vector into a vector");
2678
2680 if (!Ty) {
2681
2682 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2683 Name + ".insert");
2685 return V;
2686 }
2687
2690 "Too many elements!");
2693 assert(V->getType() == VecTy && "Vector type mismatch");
2694 return V;
2695 }
2697
2698
2699
2700
2701
2705 if (i >= BeginIndex && i < EndIndex)
2706 Mask.push_back(i - BeginIndex);
2707 else
2708 Mask.push_back(-1);
2709 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2711
2715 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2716
2717
2718 V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
2720
2722 return V;
2723}
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2750
2751
2752
2755
2756
2757
2759 const char *DebugName) {
2760 Type *EltType = VecType->getElementType();
2761 if (EltType != NewAIEltTy) {
2762
2763 unsigned TotalBits =
2764 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);
2765 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);
2766
2768 V = Builder.CreateBitCast(V, NewVecType);
2769 VecType = NewVecType;
2770 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");
2771 }
2772 };
2773
2774 BitcastIfNeeded(V0, VecType0, "V0");
2775 BitcastIfNeeded(V1, VecType1, "V1");
2776
2777 unsigned NumElts0 = VecType0->getNumElements();
2778 unsigned NumElts1 = VecType1->getNumElements();
2779
2781
2782 if (NumElts0 == NumElts1) {
2783 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2784 ShuffleMask.push_back(i);
2785 } else {
2786
2787
2788 unsigned SmallSize = std::min(NumElts0, NumElts1);
2789 unsigned LargeSize = std::max(NumElts0, NumElts1);
2790 bool IsV0Smaller = NumElts0 < NumElts1;
2791 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2793 for (unsigned i = 0; i < SmallSize; ++i)
2795 for (unsigned i = SmallSize; i < LargeSize; ++i)
2797 ExtendedVec = Builder.CreateShuffleVector(
2799 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");
2800 for (unsigned i = 0; i < NumElts0; ++i)
2801 ShuffleMask.push_back(i);
2802 for (unsigned i = 0; i < NumElts1; ++i)
2803 ShuffleMask.push_back(LargeSize + i);
2804 }
2805
2806 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2807}
2808
2809namespace {
2810
2811
2812
2813
2814
2815
2816
2817class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2818
2819 friend class InstVisitor<AllocaSliceRewriter, bool>;
2820
2821 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2822
2823 const DataLayout &DL;
2824 AllocaSlices &AS;
2826 AllocaInst &OldAI, &NewAI;
2827 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2828 Type *NewAllocaTy;
2829
2830
2831
2832
2833
2834 IntegerType *IntTy;
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2846 Type *ElementTy;
2847 uint64_t ElementSize;
2848
2849
2850
2851 uint64_t BeginOffset = 0;
2852 uint64_t EndOffset = 0;
2853
2854
2855
2856 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2857
2858 uint64_t SliceSize = 0;
2859 bool IsSplittable = false;
2860 bool IsSplit = false;
2861 Use *OldUse = nullptr;
2863
2864
2865 SmallSetVector<PHINode *, 8> &PHIUsers;
2866 SmallSetVector<SelectInst *, 8> &SelectUsers;
2867
2868
2869
2870 IRBuilderTy IRB;
2871
2872
2873
2874 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2876 return &NewAI;
2877
2878 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2879 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2880 }
2881
2882public:
2883 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2884 AllocaInst &OldAI, AllocaInst &NewAI,
2885 uint64_t NewAllocaBeginOffset,
2886 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2887 VectorType *PromotableVecTy,
2888 SmallSetVector<PHINode *, 8> &PHIUsers,
2889 SmallSetVector<SelectInst *, 8> &SelectUsers)
2890 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2891 NewAllocaBeginOffset(NewAllocaBeginOffset),
2892 NewAllocaEndOffset(NewAllocaEndOffset),
2893 NewAllocaTy(NewAI.getAllocatedType()),
2894 IntTy(
2895 IsIntegerPromotable
2897 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2898 .getFixedValue())
2899 : nullptr),
2900 VecTy(PromotableVecTy),
2901 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2902 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2903 : 0),
2904 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2905 IRB(NewAI.getContext(), ConstantFolder()) {
2906 if (VecTy) {
2907 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2908 "Only multiple-of-8 sized vector elements are viable");
2909 ++NumVectorized;
2910 }
2911 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2912 }
2913
2914 bool visit(AllocaSlices::const_iterator I) {
2915 bool CanSROA = true;
2916 BeginOffset = I->beginOffset();
2917 EndOffset = I->endOffset();
2918 IsSplittable = I->isSplittable();
2919 IsSplit =
2920 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2921 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2924
2925
2926 assert(BeginOffset < NewAllocaEndOffset);
2927 assert(EndOffset > NewAllocaBeginOffset);
2928 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2929 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2930
2931 SliceSize = NewEndOffset - NewBeginOffset;
2932 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2933 << ") NewBegin:(" << NewBeginOffset << ", "
2934 << NewEndOffset << ") NewAllocaBegin:("
2935 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2936 << ")\n");
2937 assert(IsSplit || NewBeginOffset == BeginOffset);
2938 OldUse = I->getUse();
2940
2942 IRB.SetInsertPoint(OldUserI);
2943 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2944 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2945 Twine(BeginOffset) + ".");
2946
2948 if (VecTy || IntTy)
2950 return CanSROA;
2951 }
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991 std::optional<SmallVector<Value *, 4>>
2992 rewriteTreeStructuredMerge(Partition &P) {
2993
2994 if (P.splitSliceTails().size() > 0)
2995 return std::nullopt;
2996
2998 LoadInst *TheLoad = nullptr;
2999
3000
3001 struct StoreInfo {
3002 StoreInst *Store;
3003 uint64_t BeginOffset;
3004 uint64_t EndOffset;
3005 Value *StoredValue;
3006 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)
3007 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
3008 };
3009
3011
3012
3013
3014 Type *AllocatedEltTy =
3017 : Type::getInt8Ty(NewAI.getContext());
3018 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
3019
3020
3021
3022
3023
3024
3025 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {
3027 return FixedVecTy &&
3028 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
3029 !FixedVecTy->getElementType()->isPointerTy();
3030 };
3031
3032 for (Slice &S : P) {
3035
3036
3037
3038
3039
3040 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||
3041 S.beginOffset() != NewAllocaBeginOffset ||
3042 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())
3043 return std::nullopt;
3044 TheLoad = LI;
3046
3047
3048
3049
3050
3051 if (!IsTypeValidForTreeStructuredMerge(
3052 SI->getValueOperand()->getType()) ||
3053 SI->isVolatile())
3054 return std::nullopt;
3056 unsigned NumElts = VecTy->getNumElements();
3057 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
3058 if (NumElts * EltSize % AllocatedEltTySize != 0)
3059 return std::nullopt;
3060 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
3061 SI->getValueOperand());
3062 } else {
3063
3064
3065 return std::nullopt;
3066 }
3067 }
3068
3069 if (!TheLoad)
3070 return std::nullopt;
3071
3072
3073 if (StoreInfos.size() < 2)
3074 return std::nullopt;
3075
3076
3077
3078 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {
3079 return A.BeginOffset < B.BeginOffset;
3080 });
3081
3082
3083 uint64_t ExpectedStart = NewAllocaBeginOffset;
3084 for (auto &StoreInfo : StoreInfos) {
3085 uint64_t BeginOff = StoreInfo.BeginOffset;
3086 uint64_t EndOff = StoreInfo.EndOffset;
3087
3088
3089 if (BeginOff != ExpectedStart)
3090 return std::nullopt;
3091
3092 ExpectedStart = EndOff;
3093 }
3094
3095 if (ExpectedStart != NewAllocaEndOffset)
3096 return std::nullopt;
3097
3098
3099
3100
3101
3102
3103
3104
3105
3107 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3108
3109 for (auto &StoreInfo : StoreInfos) {
3110 if (StoreInfo.Store->getParent() != StoreBB)
3111 return std::nullopt;
3112 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3113 return std::nullopt;
3114 }
3115
3116
3117
3119 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad
3120 << "\n Ordered stores:\n";
3122 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "
3123 << Info.EndOffset << ") \tStore: " << *Info.Store
3124 << "\tValue: " << *Info.StoredValue << "\n";
3125 });
3126
3127
3128
3129 std::queue<Value *> VecElements;
3130 IRBuilder<> Builder(StoreInfos.back().Store);
3131 for (const auto &Info : StoreInfos) {
3133 VecElements.push(Info.StoredValue);
3134 }
3135
3136 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");
3137 while (VecElements.size() > 1) {
3138 const auto NumElts = VecElements.size();
3139 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {
3140 Value *V0 = VecElements.front();
3141 VecElements.pop();
3142 Value *V1 = VecElements.front();
3143 VecElements.pop();
3145 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");
3146 VecElements.push(Merged);
3147 }
3148 if (NumElts % 2 == 1) {
3149 Value *V = VecElements.front();
3150 VecElements.pop();
3151 VecElements.push(V);
3152 }
3153 }
3154
3155
3156 Value *MergedValue = VecElements.front();
3157 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3158
3161 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),
3162 TheLoad->getName() + ".sroa.new.load"));
3163 DeletedValues.push_back(TheLoad);
3164
3165 return DeletedValues;
3166 }
3167
3168private:
3169
3170 using Base::visit;
3171
3172
3173 bool visitInstruction(Instruction &I) {
3174 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
3176 }
3177
3179
3180
3181 assert(IsSplit || BeginOffset == NewBeginOffset);
3182 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3183
3184 StringRef OldName = OldPtr->getName();
3185
3186 size_t LastSROAPrefix = OldName.rfind(".sroa.");
3188 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
3189
3191 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
3192
3193 OldName = OldName.substr(IndexEnd + 1);
3195 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
3196
3197 OldName = OldName.substr(OffsetEnd + 1);
3198 }
3199 }
3200
3201 OldName = OldName.substr(0, OldName.find(".sroa_"));
3202
3205 PointerTy, Twine(OldName) + ".");
3206 }
3207
3208
3209
3210
3211
3212
3213 Align getSliceAlign() {
3215 NewBeginOffset - NewAllocaBeginOffset);
3216 }
3217
3218 unsigned getIndex(uint64_t Offset) {
3219 assert(VecTy && "Can only call getIndex when rewriting a vector");
3220 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
3221 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
3222 uint32_t Index = RelOffset / ElementSize;
3223 assert(Index * ElementSize == RelOffset);
3225 }
3226
3227 void deleteIfTriviallyDead(Value *V) {
3230 Pass.DeadInsts.push_back(I);
3231 }
3232
3233 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3234 unsigned BeginIndex = getIndex(NewBeginOffset);
3235 unsigned EndIndex = getIndex(NewEndOffset);
3236 assert(EndIndex > BeginIndex && "Empty vector!");
3237
3240
3241 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3242 LLVMContext::MD_access_group});
3243 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
3244 }
3245
3246 Value *rewriteIntegerLoad(LoadInst &LI) {
3247 assert(IntTy && "We cannot insert an integer to the alloca");
3252 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3253 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3254 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3255 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
3257 }
3258
3259
3260
3261
3262
3264 "Can only handle an extract for an overly wide load");
3266 V = IRB.CreateZExt(V, LI.getType());
3267 return V;
3268 }
3269
3270 bool visitLoadInst(LoadInst &LI) {
3273 assert(OldOp == OldPtr);
3274
3276
3278
3279 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
3281 bool IsPtrAdjusted = false;
3283 if (VecTy) {
3284 V = rewriteVectorizedLoadInst(LI);
3286 V = rewriteIntegerLoad(LI);
3287 } else if (NewBeginOffset == NewAllocaBeginOffset &&
3288 NewEndOffset == NewAllocaEndOffset &&
3291 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3293 Value *NewPtr =
3294 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3295 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3296 NewAI.getAlign(), LI.isVolatile(),
3297 LI.getName());
3298 if (LI.isVolatile())
3299 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3300 if (NewLI->isAtomic())
3301 NewLI->setAlignment(LI.getAlign());
3302
3303
3304
3305
3306 copyMetadataForLoad(*NewLI, LI);
3307
3308
3309 if (AATags)
3310 NewLI->setAAMetadata(AATags.adjustForAccess(
3311 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3312
3313
3314 V = NewLI;
3315
3316
3317
3318
3319 if (auto *AITy = dyn_cast(NewAllocaTy))
3320 if (auto *TITy = dyn_cast(TargetTy))
3321 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3322 V = IRB.CreateZExt(V, TITy, "load.ext");
3323 if (DL.isBigEndian())
3324 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3325 "endian_shift");
3326 }
3327 } else {
3328 Type *LTy = IRB.getPtrTy(AS);
3329 LoadInst *NewLI =
3330 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3332
3333 if (AATags)
3335 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3336
3339 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3340 LLVMContext::MD_access_group});
3341
3342 V = NewLI;
3343 IsPtrAdjusted = true;
3344 }
3346
3347 if (IsSplit) {
3350 "Only integer type loads and stores are split");
3351 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
3352 "Split load isn't smaller than original load");
3354 "Non-byte-multiple bit width");
3355
3357
3358
3359
3360 LIIt.setHeadBit(true);
3361 IRB.SetInsertPoint(LI.getParent(), LIIt);
3362
3363
3364
3365
3366 Value *Placeholder =
3368 false, Align(1));
3369 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
3370 "insert");
3372 Placeholder->replaceAllUsesWith(&LI);
3373 Placeholder->deleteValue();
3374 } else {
3376 }
3377
3378 Pass.DeadInsts.push_back(&LI);
3379 deleteIfTriviallyDead(OldOp);
3381 return !LI.isVolatile() && !IsPtrAdjusted;
3382 }
3383
3384 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3385 AAMDNodes AATags) {
3386
3387
3389 if (V->getType() != VecTy) {
3390 unsigned BeginIndex = getIndex(NewBeginOffset);
3391 unsigned EndIndex = getIndex(NewEndOffset);
3392 assert(EndIndex > BeginIndex && "Empty vector!");
3393 unsigned NumElements = EndIndex - BeginIndex;
3395 "Too many elements!");
3396 Type *SliceTy = (NumElements == 1)
3397 ? ElementTy
3398 : FixedVectorType::get(ElementTy, NumElements);
3399 if (V->getType() != SliceTy)
3401
3402
3405 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3406 }
3407 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3408 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3409 LLVMContext::MD_access_group});
3410 if (AATags)
3413 Pass.DeadInsts.push_back(&SI);
3414
3415
3416 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3417 Store, Store->getPointerOperand(), OrigV, DL);
3419 return true;
3420 }
3421
3422 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3423 assert(IntTy && "We cannot extract an integer from the alloca");
3425 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3428 NewAI.getAlign(), "oldload");
3430 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3431 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3433 }
3435 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3436 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3437 LLVMContext::MD_access_group});
3438 if (AATags)
3441
3442 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3443 Store, Store->getPointerOperand(),
3444 Store->getValueOperand(), DL);
3445
3446 Pass.DeadInsts.push_back(&SI);
3448 return true;
3449 }
3450
3451 bool visitStoreInst(StoreInst &SI) {
3453 Value *OldOp = SI.getOperand(1);
3454 assert(OldOp == OldPtr);
3455
3456 AAMDNodes AATags = SI.getAAMetadata();
3457 Value *V = SI.getValueOperand();
3458
3459
3460
3461 if (V->getType()->isPointerTy())
3463 Pass.PostPromotionWorklist.insert(AI);
3464
3465 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
3468 assert(V->getType()->isIntegerTy() &&
3469 "Only integer type loads and stores are split");
3470 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3471 "Non-byte-multiple bit width");
3472 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3473 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3474 "extract");
3475 }
3476
3477 if (VecTy)
3478 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3479 if (IntTy && V->getType()->isIntegerTy())
3480 return rewriteIntegerStore(V, SI, AATags);
3481
3482 StoreInst *NewSI;
3483 if (NewBeginOffset == NewAllocaBeginOffset &&
3484 NewEndOffset == NewAllocaEndOffset &&
3488 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3489
3490 NewSI =
3491 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3492 } else {
3493 unsigned AS = SI.getPointerAddressSpace();
3494 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3495 NewSI =
3496 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3497 }
3498 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3499 LLVMContext::MD_access_group});
3500 if (AATags)
3503 if (SI.isVolatile())
3504 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3507
3508 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3511
3512 Pass.DeadInsts.push_back(&SI);
3513 deleteIfTriviallyDead(OldOp);
3514
3518 .isVolatile();
3519 }
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3531 assert(Size > 0 && "Expected a positive number of bytes.");
3533 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3534 if (Size == 1)
3535 return V;
3536
3538 V = IRB.CreateMul(
3539 IRB.CreateZExt(V, SplatIntTy, "zext"),
3542 SplatIntTy)),
3543 "isplat");
3544 return V;
3545 }
3546
3547
3549 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3551 return V;
3552 }
3553
3554 bool visitMemSetInst(MemSetInst &II) {
3556 assert(II.getRawDest() == OldPtr);
3557
3558 AAMDNodes AATags = II.getAAMetadata();
3559
3560
3561
3564 assert(NewBeginOffset == BeginOffset);
3565 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3566 II.setDestAlignment(getSliceAlign());
3567
3568
3569
3571 "AT: Unexpected link to non-const GEP");
3572 deleteIfTriviallyDead(OldPtr);
3573 return false;
3574 }
3575
3576
3577 Pass.DeadInsts.push_back(&II);
3578
3581
3582 const bool CanContinue = [&]() {
3583 if (VecTy || IntTy)
3584 return true;
3585 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3586 return false;
3587
3589 const uint64_t Len = C->getLimitedValue();
3590 if (Len > std::numeric_limits::max())
3591 return false;
3592 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3595 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3596 }();
3597
3598
3599
3600 if (!CanContinue) {
3601 Type *SizeTy = II.getLength()->getType();
3602 unsigned Sz = NewEndOffset - NewBeginOffset;
3603 Constant *Size = ConstantInt::get(SizeTy, Sz);
3605 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3606 MaybeAlign(getSliceAlign()), II.isVolatile()));
3607 if (AATags)
3608 New->setAAMetadata(
3609 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3610
3611 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3612 New, New->getRawDest(), nullptr, DL);
3613
3615 return false;
3616 }
3617
3618
3619
3620
3621
3622
3624
3625 if (VecTy) {
3626
3627 assert(ElementTy == ScalarTy);
3628
3629 unsigned BeginIndex = getIndex(NewBeginOffset);
3630 unsigned EndIndex = getIndex(NewEndOffset);
3631 assert(EndIndex > BeginIndex && "Empty vector!");
3632 unsigned NumElements = EndIndex - BeginIndex;
3634 "Too many elements!");
3635
3637 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3639 if (NumElements > 1)
3641
3643 NewAI.getAlign(), "oldload");
3645 } else if (IntTy) {
3646
3647
3649
3650 uint64_t Size = NewEndOffset - NewBeginOffset;
3651 V = getIntegerSplat(II.getValue(), Size);
3652
3653 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3654 EndOffset != NewAllocaBeginOffset)) {
3656 NewAI.getAlign(), "oldload");
3658 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3660 } else {
3661 assert(V->getType() == IntTy &&
3662 "Wrong type for an alloca wide integer!");
3663 }
3665 } else {
3666
3667 assert(NewBeginOffset == NewAllocaBeginOffset);
3668 assert(NewEndOffset == NewAllocaEndOffset);
3669
3670 V = getIntegerSplat(II.getValue(),
3671 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3675
3677 }
3678
3679 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3680 StoreInst *New =
3681 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3682 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3683 LLVMContext::MD_access_group});
3684 if (AATags)
3685 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3687
3688 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3689 New, New->getPointerOperand(), V, DL);
3690
3692 return .isVolatile();
3693 }
3694
3695 bool visitMemTransferInst(MemTransferInst &II) {
3696
3697
3698
3700
3701 AAMDNodes AATags = II.getAAMetadata();
3702
3703 bool IsDest = &II.getRawDestUse() == OldUse;
3704 assert((IsDest && II.getRawDest() == OldPtr) ||
3705 (!IsDest && II.getRawSource() == OldPtr));
3706
3707 Align SliceAlign = getSliceAlign();
3708
3709
3710
3711
3712
3713
3714
3715 if (!IsSplittable) {
3716 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3717 if (IsDest) {
3718
3721 DbgAssign->getAddress() == II.getDest())
3722 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3723 }
3724 II.setDest(AdjustedPtr);
3725 II.setDestAlignment(SliceAlign);
3726 } else {
3727 II.setSource(AdjustedPtr);
3728 II.setSourceAlignment(SliceAlign);
3729 }
3730
3732 deleteIfTriviallyDead(OldPtr);
3733 return false;
3734 }
3735
3736
3737
3738
3739
3740
3741
3742
3743 bool EmitMemCpy =
3744 !VecTy && !IntTy &&
3745 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3746 SliceSize !=
3750
3751
3752
3753
3754 if (EmitMemCpy && &OldAI == &NewAI) {
3755
3756 assert(NewBeginOffset == BeginOffset);
3757
3758
3759 if (NewEndOffset != EndOffset)
3760 II.setLength(NewEndOffset - NewBeginOffset);
3761 return false;
3762 }
3763
3764 Pass.DeadInsts.push_back(&II);
3765
3766
3767
3768 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3769 if (AllocaInst *AI =
3771 assert(AI != &OldAI && AI != &NewAI &&
3772 "Splittable transfers cannot reach the same alloca on both ends.");
3773 Pass.Worklist.insert(AI);
3774 }
3775
3778
3779
3780 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3781 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3782 Align OtherAlign =
3783 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3784 OtherAlign =
3785 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3786
3787 if (EmitMemCpy) {
3788
3789
3790 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3791 OtherPtr->getName() + ".");
3792
3793 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3794 Type *SizeTy = II.getLength()->getType();
3795 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3796
3797 Value *DestPtr, *SrcPtr;
3798 MaybeAlign DestAlign, SrcAlign;
3799
3800 if (IsDest) {
3801 DestPtr = OurPtr;
3802 DestAlign = SliceAlign;
3803 SrcPtr = OtherPtr;
3804 SrcAlign = OtherAlign;
3805 } else {
3806 DestPtr = OtherPtr;
3807 DestAlign = OtherAlign;
3808 SrcPtr = OurPtr;
3809 SrcAlign = SliceAlign;
3810 }
3811 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3813 if (AATags)
3814 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3815
3816 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3817 if (IsDest) {
3818 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3819 &II, New, DestPtr, nullptr, DL);
3824 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3825 }
3827 return false;
3828 }
3829
3830 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3831 NewEndOffset == NewAllocaEndOffset;
3832 uint64_t Size = NewEndOffset - NewBeginOffset;
3833 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3834 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3835 unsigned NumElements = EndIndex - BeginIndex;
3836 IntegerType *SubIntTy =
3837 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3838
3839
3840
3841 Type *OtherTy;
3842 if (VecTy && !IsWholeAlloca) {
3843 if (NumElements == 1)
3844 OtherTy = VecTy->getElementType();
3845 else
3847 } else if (IntTy && !IsWholeAlloca) {
3848 OtherTy = SubIntTy;
3849 } else {
3850 OtherTy = NewAllocaTy;
3851 }
3852
3854 OtherPtr->getName() + ".");
3855 MaybeAlign SrcAlign = OtherAlign;
3856 MaybeAlign DstAlign = SliceAlign;
3857 if (!IsDest)
3859
3862
3863 if (IsDest) {
3864 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3865 SrcPtr = AdjPtr;
3866 } else {
3867 DstPtr = AdjPtr;
3868 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3869 }
3870
3872 if (VecTy && !IsWholeAlloca && !IsDest) {
3873 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3875 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3876 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3877 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3880 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3882 } else {
3883 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3884 II.isVolatile(), "copyload");
3885 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3886 LLVMContext::MD_access_group});
3887 if (AATags)
3888 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3891 }
3892
3893 if (VecTy && !IsWholeAlloca && IsDest) {
3895 NewAI.getAlign(), "oldload");
3896 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3897 } else if (IntTy && !IsWholeAlloca && IsDest) {
3899 NewAI.getAlign(), "oldload");
3901 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3904 }
3905
3907 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3908 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3909 LLVMContext::MD_access_group});
3910 if (AATags)
3912 Src->getType(), DL));
3913
3914 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3915 if (IsDest) {
3916
3917 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3918 Store, DstPtr, Src, DL);
3923 &II, Store, DstPtr, Src, DL);
3924 }
3925
3927 return .isVolatile();
3928 }
3929
3930 bool visitIntrinsicInst(IntrinsicInst &II) {
3931 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
3932 "Unexpected intrinsic!");
3934
3935
3936 Pass.DeadInsts.push_back(&II);
3937
3938 if (II.isDroppable()) {
3939 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3940
3942 return true;
3943 }
3944
3945 assert(II.getArgOperand(0) == OldPtr);
3949 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3950 New = IRB.CreateLifetimeStart(Ptr);
3951 else
3952 New = IRB.CreateLifetimeEnd(Ptr);
3953
3954 (void)New;
3956
3957 return true;
3958 }
3959
3960 void fixLoadStoreAlign(Instruction &Root) {
3961
3962
3963
3964 SmallPtrSet<Instruction *, 4> Visited;
3965 SmallVector<Instruction *, 4> Uses;
3966 Visited.insert(&Root);
3967 Uses.push_back(&Root);
3968 do {
3970
3973 continue;
3974 }
3976 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3977 continue;
3978 }
3979
3983 for (User *U : I->users())
3986 } while (.empty());
3987 }
3988
3989 bool visitPHINode(PHINode &PN) {
3991 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3992 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3993
3994
3995
3996
3997
3998 IRBuilderBase::InsertPointGuard Guard(IRB);
4000 IRB.SetInsertPoint(OldPtr->getParent(),
4001 OldPtr->getParent()->getFirstInsertionPt());
4002 else
4003 IRB.SetInsertPoint(OldPtr);
4004 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
4005
4006 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
4007
4009
4011 deleteIfTriviallyDead(OldPtr);
4012
4013
4014 fixLoadStoreAlign(PN);
4015
4016
4017
4018
4019 PHIUsers.insert(&PN);
4020 return true;
4021 }
4022
4023 bool visitSelectInst(SelectInst &SI) {
4025 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
4026 "Pointer isn't an operand!");
4027 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
4028 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
4029
4030 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
4031
4032 if (SI.getOperand(1) == OldPtr)
4033 SI.setOperand(1, NewPtr);
4034 if (SI.getOperand(2) == OldPtr)
4035 SI.setOperand(2, NewPtr);
4036
4038 deleteIfTriviallyDead(OldPtr);
4039
4040
4041 fixLoadStoreAlign(SI);
4042
4043
4044
4045
4046 SelectUsers.insert(&SI);
4047 return true;
4048 }
4049};
4050
4051
4052
4053
4054
4055
4056class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
4057
4058 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4059
4060
4062
4063
4064 SmallPtrSet<User *, 8> Visited;
4065
4066
4067
4069
4070
4071 const DataLayout &DL;
4072
4073 IRBuilderTy &IRB;
4074
4075public:
4076 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
4078
4079
4080
4081 bool rewrite(Instruction &I) {
4082 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
4083 enqueueUsers(I);
4085 while (.empty()) {
4086 U = Queue.pop_back_val();
4088 }
4090 }
4091
4092private:
4093
4094
4095 void enqueueUsers(Instruction &I) {
4096 for (Use &U : I.uses())
4097 if (Visited.insert(U.getUser()).second)
4098 Queue.push_back(&U);
4099 }
4100
4101
4102 bool visitInstruction(Instruction &I) { return false; }
4103
4104
4105 template class OpSplitter {
4106 protected:
4107
4108 IRBuilderTy &IRB;
4109
4110
4111
4112 SmallVector<unsigned, 4> Indices;
4113
4114
4115
4117
4118
4119
4121
4122
4123 Type *BaseTy;
4124
4125
4126 Align BaseAlign;
4127
4128
4129
4130 const DataLayout &DL;
4131
4132
4133
4134 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4135 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
4136 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4137 BaseAlign(BaseAlign), DL(DL) {
4138 IRB.SetInsertPoint(InsertionPoint);
4139 }
4140
4141 public:
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
4157 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4158 return static_cast<Derived *>(this)->emitFunc(
4160 }
4161
4163 unsigned OldSize = Indices.size();
4164 (void)OldSize;
4165 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
4166 ++Idx) {
4167 assert(Indices.size() == OldSize && "Did not return to the old size");
4169 GEPIndices.push_back(IRB.getInt32(Idx));
4170 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
4173 }
4174 return;
4175 }
4176
4178 unsigned OldSize = Indices.size();
4179 (void)OldSize;
4180 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
4181 ++Idx) {
4182 assert(Indices.size() == OldSize && "Did not return to the old size");
4184 GEPIndices.push_back(IRB.getInt32(Idx));
4185 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
4188 }
4189 return;
4190 }
4191
4192 llvm_unreachable("Only arrays and structs are aggregate loadable types");
4193 }
4194 };
4195
4196 struct LoadOpSplitter : public OpSplitter {
4197 AAMDNodes AATags;
4198
4199
4201
4202
4204
4205 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4206 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
4207 IRBuilderTy &IRB)
4208 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
4209 IRB),
4210 AATags(AATags) {}
4211
4212
4213
4214 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4216
4218 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4219 LoadInst *Load =
4220 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
4221
4224 if (AATags &&
4226 Load->setAAMetadata(
4228
4229
4231
4232 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
4234 }
4235
4236
4237 void recordFakeUses(LoadInst &LI) {
4238 for (Use &U : LI.uses())
4240 if (II->getIntrinsicID() == Intrinsic::fake_use)
4242 }
4243
4244
4245
4246 void emitFakeUses() {
4247 for (Instruction *I : FakeUses) {
4248 IRB.SetInsertPoint(I);
4249 for (auto *V : Components)
4250 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});
4251 I->eraseFromParent();
4252 }
4253 }
4254 };
4255
4256 bool visitLoadInst(LoadInst &LI) {
4259 return false;
4260
4261
4265 Splitter.recordFakeUses(LI);
4267 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
4268 Splitter.emitFakeUses();
4269 Visited.erase(&LI);
4272 return true;
4273 }
4274
4275 struct StoreOpSplitter : public OpSplitter {
4276 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4277 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4278 const DataLayout &DL, IRBuilderTy &IRB)
4279 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign,
4280 DL, IRB),
4281 AATags(AATags), AggStore(AggStore) {}
4282 AAMDNodes AATags;
4283 StoreInst *AggStore;
4284
4285
4286 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4288
4289
4290
4291
4292 Value *ExtractValue =
4293 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
4294 Value *InBoundsGEP =
4295 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4296 StoreInst *Store =
4297 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4298
4302 if (AATags) {
4305 }
4306
4307
4308
4309
4310
4313 uint64_t SizeInBits =
4314 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
4316 SizeInBits, AggStore, Store,
4317 Store->getPointerOperand(), Store->getValueOperand(),
4318 DL);
4319 } else {
4321 "AT: unexpected debug.assign linked to store through "
4322 "unbounded GEP");
4323 }
4325 }
4326 };
4327
4328 bool visitStoreInst(StoreInst &SI) {
4329 if (.isSimple() || SI.getPointerOperand() != *U)
4330 return false;
4331 Value *V = SI.getValueOperand();
4332 if (V->getType()->isSingleValueType())
4333 return false;
4334
4335
4337 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
4339 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
4340 Visited.erase(&SI);
4341
4342
4344 SI.eraseFromParent();
4345 return true;
4346 }
4347
4348 bool visitBitCastInst(BitCastInst &BC) {
4349 enqueueUsers(BC);
4350 return false;
4351 }
4352
4353 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4354 enqueueUsers(ASC);
4355 return false;
4356 }
4357
4358
4359
4360
4361
4362
4363 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4364
4365
4369 if (Sel)
4370 return false;
4371
4372 Sel = SI;
4375 return false;
4376 continue;
4377 }
4379 if (Sel)
4380 return false;
4381 Sel = ZI;
4382 if (!ZI->getSrcTy()->isIntegerTy(1))
4383 return false;
4384 continue;
4385 }
4386
4388 return false;
4389 }
4390
4391 if (!Sel)
4392 return false;
4393
4394 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4395 dbgs() << " original: " << *Sel << "\n";
4396 dbgs() << " " << GEPI << "\n";);
4397
4398 auto GetNewOps = [&](Value *SelOp) {
4401 if (Op == Sel)
4403 else
4405 return NewOps;
4406 };
4407
4411 Cond = SI->getCondition();
4412 True = SI->getTrueValue();
4413 False = SI->getFalseValue();
4415 MDFrom = SI;
4416 } else {
4417 Cond = Sel->getOperand(0);
4418 True = ConstantInt::get(Sel->getType(), 1);
4419 False = ConstantInt::get(Sel->getType(), 0);
4420 }
4423
4424 IRB.SetInsertPoint(&GEPI);
4426
4428 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4429 True->getName() + ".sroa.gep", NW);
4430
4432 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4433 False->getName() + ".sroa.gep", NW);
4434
4435 Value *NSel = MDFrom
4436 ? IRB.CreateSelect(Cond, NTrue, NFalse,
4437 Sel->getName() + ".sroa.sel", MDFrom)
4438 : IRB.CreateSelectWithUnknownProfile(
4440 Sel->getName() + ".sroa.sel");
4441 Visited.erase(&GEPI);
4445 Visited.insert(NSelI);
4446 enqueueUsers(*NSelI);
4447
4449 dbgs() << " " << *NFalse << "\n";
4450 dbgs() << " " << *NSel << "\n";);
4451
4452 return true;
4453 }
4454
4455
4456
4457
4458
4459 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4460
4461
4462
4464 auto IsInvalidPointerOperand = [](Value *V) {
4466 return false;
4468 return !AI->isStaticAlloca();
4469 return true;
4470 };
4471 if (Phi) {
4472 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4473 return false;
4474 } else {
4476 return false;
4477 }
4478
4479
4482 if (Phi)
4483 return false;
4484
4486 if ((Phi->incoming_values(),
4487 [](Value *V) { return isa(V); }))
4488 return false;
4489 continue;
4490 }
4491
4493 return false;
4494 }
4495
4496 if (!Phi)
4497 return false;
4498
4499 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4500 dbgs() << " original: " << *Phi << "\n";
4501 dbgs() << " " << GEPI << "\n";);
4502
4503 auto GetNewOps = [&](Value *PhiOp) {
4506 if (Op == Phi)
4508 else
4510 return NewOps;
4511 };
4512
4513 IRB.SetInsertPoint(Phi);
4514 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4515 Phi->getName() + ".sroa.phi");
4516
4518
4519
4521 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4527 } else {
4529 NewGEP =
4530 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4532 }
4534 }
4535
4536 Visited.erase(&GEPI);
4539 Visited.insert(NewPhi);
4540 enqueueUsers(*NewPhi);
4541
4545 << "\n " << *In;
4546 dbgs() << "\n " << *NewPhi << '\n');
4547
4548 return true;
4549 }
4550
4551 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4552 if (unfoldGEPSelect(GEPI))
4553 return true;
4554
4555 if (unfoldGEPPhi(GEPI))
4556 return true;
4557
4558 enqueueUsers(GEPI);
4559 return false;
4560 }
4561
4562 bool visitPHINode(PHINode &PN) {
4563 enqueueUsers(PN);
4564 return false;
4565 }
4566
4567 bool visitSelectInst(SelectInst &SI) {
4568 enqueueUsers(SI);
4569 return false;
4570 }
4571};
4572
4573}
4574
4575
4576
4577
4578
4579
4581 if (Ty->isSingleValueType())
4582 return Ty;
4583
4584 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4586
4587 Type *InnerTy;
4589 InnerTy = ArrTy->getElementType();
4593 InnerTy = STy->getElementType(Index);
4594 } else {
4595 return Ty;
4596 }
4597
4598 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4599 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4600 return Ty;
4601
4603}
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4620 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4622 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4623 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4624 return nullptr;
4625
4627 Type *ElementTy;
4630 ElementTy = AT->getElementType();
4631 TyNumElements = AT->getNumElements();
4632 } else {
4633
4634
4636 ElementTy = VT->getElementType();
4637 TyNumElements = VT->getNumElements();
4638 }
4639 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4641 if (NumSkippedElements >= TyNumElements)
4642 return nullptr;
4643 Offset -= NumSkippedElements * ElementSize;
4644
4645
4646 if (Offset > 0 || Size < ElementSize) {
4647
4649 return nullptr;
4650
4652 }
4654
4655 if (Size == ElementSize)
4659 if (NumElements * ElementSize != Size)
4660 return nullptr;
4662 }
4663
4665 if (!STy)
4666 return nullptr;
4667
4669
4671 return nullptr;
4672
4674 return nullptr;
4677 return nullptr;
4678
4681
4683 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4684 if (Offset >= ElementSize)
4685 return nullptr;
4686
4687
4688 if (Offset > 0 || Size < ElementSize) {
4690 return nullptr;
4692 }
4694
4695 if (Size == ElementSize)
4697
4702 if (Index == EndIndex)
4703 return nullptr;
4704
4705
4706
4707
4708
4710 return nullptr;
4711
4712 assert(Index < EndIndex);
4714 }
4715
4716
4719 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4721 return nullptr;
4722
4723 return SubTy;
4724}
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4752 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4753
4754
4755
4756
4757
4760
4761
4762
4763
4764
4765 struct SplitOffsets {
4766 Slice *S;
4767 std::vector<uint64_t> Splits;
4768 };
4769 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4783
4784 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4785 for (auto &P : AS.partitions()) {
4786 for (Slice &S : P) {
4788 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4789
4790
4791
4793 UnsplittableLoads.insert(LI);
4796 UnsplittableLoads.insert(LI);
4797 continue;
4798 }
4799 assert(P.endOffset() > S.beginOffset() &&
4800 "Empty or backwards partition!");
4801
4802
4804 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4805
4806
4807
4808
4809 auto IsLoadSimplyStored = [](LoadInst *LI) {
4810 for (User *LU : LI->users()) {
4812 if (!SI || ->isSimple())
4813 return false;
4814 }
4815 return true;
4816 };
4817 if (!IsLoadSimplyStored(LI)) {
4818 UnsplittableLoads.insert(LI);
4819 continue;
4820 }
4821
4824 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4825
4826 continue;
4828 if (!StoredLoad || !StoredLoad->isSimple())
4829 continue;
4830 assert(->isVolatile() && "Cannot split volatile stores!");
4831
4833 } else {
4834
4835 continue;
4836 }
4837
4838
4840 auto &Offsets = SplitOffsetsMap[I];
4842 "Should not have splits the first time we see an instruction!");
4844 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4845 }
4846
4847
4848
4849 for (Slice *S : P.splitSliceTails()) {
4850 auto SplitOffsetsMapI =
4852 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4853 continue;
4854 auto &Offsets = SplitOffsetsMapI->second;
4855
4856 assert(Offsets.S == S && "Found a mismatched slice!");
4858 "Cannot have an empty set of splits on the second partition!");
4860 P.beginOffset() - Offsets.S->beginOffset() &&
4861 "Previous split does not end where this one begins!");
4862
4863
4864
4865 if (S->endOffset() > P.endOffset())
4866 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4867 }
4868 }
4869
4870
4871
4872
4873
4874 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4875
4876
4878
4879
4880 if (UnsplittableLoads.count(LI))
4881 return true;
4882
4883 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4884 if (LoadOffsetsI == SplitOffsetsMap.end())
4885 return false;
4886 auto &LoadOffsets = LoadOffsetsI->second;
4887
4888
4889 auto &StoreOffsets = SplitOffsetsMap[SI];
4890
4891
4892
4893
4894 if (LoadOffsets.Splits == StoreOffsets.Splits)
4895 return false;
4896
4897 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4898 << " " << *LI << "\n"
4899 << " " << *SI << "\n");
4900
4901
4902
4903
4904
4905 UnsplittableLoads.insert(LI);
4906 return true;
4907 });
4908
4909
4910
4911
4912 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4914 return UnsplittableLoads.count(LI);
4915 });
4916
4917
4918 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4919 return UnsplittableLoads.count(LI);
4920 });
4921
4922
4923
4924 if (Loads.empty() && Stores.empty())
4925 return false;
4926
4927
4928
4929 IRBuilderTy IRB(&AI);
4930
4931
4933
4934
4935
4936 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4947 std::vector<LoadInst *> SplitLoads;
4948 const DataLayout &DL = AI.getDataLayout();
4949 for (LoadInst *LI : Loads) {
4950 SplitLoads.clear();
4951
4952 auto &Offsets = SplitOffsetsMap[LI];
4953 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4955 "Load must have type size equal to store size");
4957 "Load must be >= slice size");
4958
4959 uint64_t BaseOffset = Offsets.S->beginOffset();
4960 assert(BaseOffset + SliceSize > BaseOffset &&
4961 "Cannot represent alloca access size using 64-bit integers!");
4962
4964 IRB.SetInsertPoint(LI);
4965
4966 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4967
4968 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4969 int Idx = 0, Size = Offsets.Splits.size();
4970 for (;;) {
4971 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4974 LoadInst *PLoad = IRB.CreateAlignedLoad(
4975 PartTy,
4977 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4978 PartPtrTy, BasePtr->getName() + "."),
4980 false, LI->getName());
4981 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4982 LLVMContext::MD_access_group});
4983
4984
4985
4986 SplitLoads.push_back(PLoad);
4987
4988
4990 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4992 false, nullptr));
4993 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4994 << ", " << NewSlices.back().endOffset()
4995 << "): " << *PLoad << "\n");
4996
4997
4998 if (Idx >= Size)
4999 break;
5000
5001
5002 PartOffset = Offsets.Splits[Idx];
5003 ++Idx;
5004 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
5005 }
5006
5007
5008
5009
5010 bool DeferredStores = false;
5011 for (User *LU : LI->users()) {
5013 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
5014 DeferredStores = true;
5015 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
5016 << "\n");
5017 continue;
5018 }
5019
5020 Value *StoreBasePtr = SI->getPointerOperand();
5021 IRB.SetInsertPoint(SI);
5022 AAMDNodes AATags = SI->getAAMetadata();
5023
5024 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
5025
5026 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
5027 LoadInst *PLoad = SplitLoads[Idx];
5028 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
5029 auto *PartPtrTy = SI->getPointerOperandType();
5030
5031 auto AS = SI->getPointerAddressSpace();
5032 StoreInst *PStore = IRB.CreateAlignedStore(
5033 PLoad,
5035 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5036 PartPtrTy, StoreBasePtr->getName() + "."),
5038 false);
5039 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5040 LLVMContext::MD_access_group,
5041 LLVMContext::MD_DIAssignID});
5042
5043 if (AATags)
5046 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
5047 }
5048
5049
5050
5051
5052
5054 ResplitPromotableAllocas.insert(OtherAI);
5055 Worklist.insert(OtherAI);
5058 Worklist.insert(OtherAI);
5059 }
5060
5061
5062 DeadInsts.push_back(SI);
5063 }
5064
5065
5066 if (DeferredStores)
5067 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
5068
5069
5070 DeadInsts.push_back(LI);
5072 }
5073
5074
5075
5076
5077
5078
5079 for (StoreInst *SI : Stores) {
5083 uint64_t StoreSize = Ty->getBitWidth() / 8;
5084 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
5085
5086 auto &Offsets = SplitOffsetsMap[SI];
5088 "Slice size should always match load size exactly!");
5089 uint64_t BaseOffset = Offsets.S->beginOffset();
5090 assert(BaseOffset + StoreSize > BaseOffset &&
5091 "Cannot represent alloca access size using 64-bit integers!");
5092
5095
5096 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
5097
5098
5099 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
5100 std::vector<LoadInst *> *SplitLoads = nullptr;
5101 if (SplitLoadsMapI != SplitLoadsMap.end()) {
5102 SplitLoads = &SplitLoadsMapI->second;
5103 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
5104 "Too few split loads for the number of splits in the store!");
5105 } else {
5107 }
5108
5109 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
5110 int Idx = 0, Size = Offsets.Splits.size();
5111 for (;;) {
5112 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
5114 auto *StorePartPtrTy = SI->getPointerOperandType();
5115
5116
5117 LoadInst *PLoad;
5118 if (SplitLoads) {
5119 PLoad = (*SplitLoads)[Idx];
5120 } else {
5121 IRB.SetInsertPoint(LI);
5123 PLoad = IRB.CreateAlignedLoad(
5124 PartTy,
5126 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5127 LoadPartPtrTy, LoadBasePtr->getName() + "."),
5129 false, LI->getName());
5130 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5131 LLVMContext::MD_access_group});
5132 }
5133
5134
5135 IRB.SetInsertPoint(SI);
5136 auto AS = SI->getPointerAddressSpace();
5137 StoreInst *PStore = IRB.CreateAlignedStore(
5138 PLoad,
5140 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5141 StorePartPtrTy, StoreBasePtr->getName() + "."),
5143 false);
5144 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5145 LLVMContext::MD_access_group});
5146
5147
5148
5149
5151 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5153 false, nullptr));
5154 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
5155 << ", " << NewSlices.back().endOffset()
5156 << "): " << *PStore << "\n");
5157 if (!SplitLoads) {
5158 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
5159 }
5160
5161
5162 if (Idx >= Size)
5163 break;
5164
5165
5166 PartOffset = Offsets.Splits[Idx];
5167 ++Idx;
5168 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
5169 }
5170
5171
5172
5173
5174
5175
5176 if (!SplitLoads) {
5178 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5179 ResplitPromotableAllocas.insert(OtherAI);
5180 Worklist.insert(OtherAI);
5183 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5184 Worklist.insert(OtherAI);
5185 }
5186 }
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5198 assert(*LI->user_begin() == SI && "Single use isn't this store!");
5199 DeadInsts.push_back(LI);
5200 }
5201 DeadInsts.push_back(SI);
5203 }
5204
5205
5206 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
5207
5208
5209
5210 AS.insert(NewSlices);
5211
5213#ifndef NDEBUG
5214 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
5216#endif
5217
5218
5219
5220 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5221
5222 return true;
5223}
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5236 Partition &P) {
5237
5238
5239
5240 Type *SliceTy = nullptr;
5241 const DataLayout &DL = AI.getDataLayout();
5242 unsigned VScale = AI.getFunction()->getVScaleValue();
5243
5244 std::pair<Type *, IntegerType *> CommonUseTy =
5246
5247 if (CommonUseTy.first) {
5248 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);
5250 SliceTy = CommonUseTy.first;
5251 }
5252
5253 if (!SliceTy)
5255 P.beginOffset(), P.size()))
5256 SliceTy = TypePartitionTy;
5257
5258
5259 if (!SliceTy && CommonUseTy.second)
5260 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size())
5261 SliceTy = CommonUseTy.second;
5262 if ((!SliceTy || (SliceTy->isArrayTy() &&
5264 DL.isLegalInteger(P.size() * 8)) {
5265 SliceTy = Type::getIntNTy(*C, P.size() * 8);
5266 }
5267
5268 if (!SliceTy)
5269 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
5270 assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());
5271
5273
5276 if (VecTy)
5277 SliceTy = VecTy;
5278
5279
5280
5281
5282
5283
5284
5285 AllocaInst *NewAI;
5286 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
5287 NewAI = &AI;
5288
5289
5290
5291 } else {
5292
5294
5295
5296 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
5297 NewAI = new AllocaInst(
5298 SliceTy, AI.getAddressSpace(), nullptr,
5299 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
5300 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
5301 AI.getIterator());
5302
5304 ++NumNewAllocas;
5305 }
5306
5307 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
5308 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
5309
5310
5311
5312
5313 unsigned PPWOldSize = PostPromotionWorklist.size();
5314 unsigned NumUses = 0;
5315 SmallSetVector<PHINode *, 8> PHIUsers;
5316 SmallSetVector<SelectInst *, 8> SelectUsers;
5317
5318 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
5319 P.endOffset(), IsIntegerPromotable, VecTy,
5320 PHIUsers, SelectUsers);
5321 bool Promotable = true;
5322
5323 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {
5324 NumUses += DeletedValues->size() + 1;
5325 for (Value *V : *DeletedValues)
5326 DeadInsts.push_back(V);
5327 } else {
5328 for (Slice *S : P.splitSliceTails()) {
5329 Promotable &= Rewriter.visit(S);
5330 ++NumUses;
5331 }
5332 for (Slice &S : P) {
5333 Promotable &= Rewriter.visit(&S);
5334 ++NumUses;
5335 }
5336 }
5337
5338 NumAllocaPartitionUses += NumUses;
5339 MaxUsesPerAllocaPartition.updateMax(NumUses);
5340
5341
5342
5343 for (PHINode *PHI : PHIUsers)
5345 Promotable = false;
5346 PHIUsers.clear();
5347 SelectUsers.clear();
5348 break;
5349 }
5350
5352 NewSelectsToRewrite;
5353 NewSelectsToRewrite.reserve(SelectUsers.size());
5354 for (SelectInst *Sel : SelectUsers) {
5355 std::optional Ops =
5356 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5357 if () {
5358 Promotable = false;
5359 PHIUsers.clear();
5360 SelectUsers.clear();
5361 NewSelectsToRewrite.clear();
5362 break;
5363 }
5364 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
5365 }
5366
5367 if (Promotable) {
5368 for (Use *U : AS.getDeadUsesIfPromotable()) {
5370 Value::dropDroppableUse(*U);
5371 if (OldInst)
5373 DeadInsts.push_back(OldInst);
5374 }
5375 if (PHIUsers.empty() && SelectUsers.empty()) {
5376
5377 PromotableAllocas.insert(NewAI);
5378 } else {
5379
5380
5381
5382 SpeculatablePHIs.insert_range(PHIUsers);
5383 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5384 NewSelectsToRewrite.size());
5386 std::make_move_iterator(NewSelectsToRewrite.begin()),
5387 std::make_move_iterator(NewSelectsToRewrite.end())))
5388 SelectsToRewrite.insert(std::move(KV));
5389 Worklist.insert(NewAI);
5390 }
5391 } else {
5392
5393 while (PostPromotionWorklist.size() > PPWOldSize)
5394 PostPromotionWorklist.pop_back();
5395
5396
5397
5398 if (NewAI == &AI)
5399 return nullptr;
5400
5401
5402
5403
5404 Worklist.insert(NewAI);
5405 }
5406
5407 return NewAI;
5408}
5409
5410
5411
5417
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5451 int64_t BitExtractOffset) {
5453 bool HasFragment = false;
5454 bool HasBitExtract = false;
5455
5458 HasFragment = true;
5459 continue;
5460 }
5463 HasBitExtract = true;
5464 int64_t ExtractOffsetInBits = Op.getArg(0);
5465 int64_t ExtractSizeInBits = Op.getArg(1);
5466
5467
5468
5469
5470
5472 return nullptr;
5473
5474 assert(BitExtractOffset <= 0);
5475 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5476
5477
5478
5479
5480
5481 if (AdjustedOffset < 0)
5482 return nullptr;
5483
5484 Ops.push_back(Op.getOp());
5485 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5486 Ops.push_back(ExtractSizeInBits);
5487 continue;
5488 }
5490 }
5491
5492
5493
5494 if (HasFragment && HasBitExtract)
5495 return nullptr;
5496
5497 if (!HasBitExtract) {
5501 }
5503}
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513static void
5516 std::optionalDIExpression::FragmentInfo NewFragment,
5517 int64_t BitExtractAdjustment) {
5518 (void)DIB;
5519
5520
5521
5522
5525 if (NewFragment)
5527 BitExtractAdjustment);
5528 if (!NewFragmentExpr)
5529 return;
5530
5534 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5536 return;
5537 }
5538
5542
5543
5544
5547 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5549 return;
5550 }
5551
5552
5553 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5554 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5556 }
5557
5559 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5561 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5562 (void)NewAssign;
5563}
5564
5565
5566
5567bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5568 if (AS.begin() == AS.end())
5569 return false;
5570
5571 unsigned NumPartitions = 0;
5573 const DataLayout &DL = AI.getModule()->getDataLayout();
5574
5575
5576 Changed |= presplitLoadsAndStores(AI, AS);
5577
5578
5579
5580
5581
5582
5583
5584 bool IsSorted = true;
5585
5586 uint64_t AllocaSize =
5587 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5588 const uint64_t MaxBitVectorSize = 1024;
5589 if (AllocaSize <= MaxBitVectorSize) {
5590
5591
5592 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5593 for (Slice &S : AS)
5594 for (unsigned O = S.beginOffset() + 1;
5595 O < S.endOffset() && O < AllocaSize; O++)
5596 SplittableOffset.reset(O);
5597
5598 for (Slice &S : AS) {
5599 if (!S.isSplittable())
5600 continue;
5601
5602 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5603 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5604 continue;
5605
5608 S.makeUnsplittable();
5609 IsSorted = false;
5610 }
5611 }
5612 } else {
5613
5614
5615 for (Slice &S : AS) {
5616 if (!S.isSplittable())
5617 continue;
5618
5619 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5620 continue;
5621
5624 S.makeUnsplittable();
5625 IsSorted = false;
5626 }
5627 }
5628 }
5629
5630 if (!IsSorted)
5632
5633
5634
5635 struct Fragment {
5636 AllocaInst *Alloca;
5638 uint64_t Size;
5639 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5641 };
5643
5644
5645 for (auto &P : AS.partitions()) {
5646 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5647 Changed = true;
5648 if (NewAI != &AI) {
5649 uint64_t SizeOfByte = 8;
5650 uint64_t AllocaSize =
5651 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5652
5653 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5654 Fragments.push_back(
5655 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5656 }
5657 }
5658 ++NumPartitions;
5659 }
5660
5661 NumAllocaPartitions += NumPartitions;
5662 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5663
5664
5665
5666 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5667
5669 return;
5670
5671 const Value *DbgPtr = DbgVariable->getAddress();
5673 DbgVariable->getFragmentOrEntireVariable();
5674
5675
5676 int64_t CurrentExprOffsetInBytes = 0;
5677 SmallVector<uint64_t> PostOffsetOps;
5679 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5680 return;
5681
5682
5683 int64_t ExtractOffsetInBits = 0;
5687 ExtractOffsetInBits = Op.getArg(0);
5688 break;
5689 }
5690 }
5691
5692 DIBuilder DIB(*AI.getModule(), false);
5693 for (auto Fragment : Fragments) {
5694 int64_t OffsetFromLocationInBits;
5695 std::optionalDIExpression::FragmentInfo NewDbgFragment;
5696
5697
5698
5700 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5701 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5702 NewDbgFragment, OffsetFromLocationInBits))
5703 continue;
5704
5705
5706
5707
5708 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5709 continue;
5710
5711
5712
5713 if (!NewDbgFragment)
5714 NewDbgFragment = DbgVariable->getFragment();
5715
5716
5717
5718 int64_t OffestFromNewAllocaInBits =
5719 OffsetFromLocationInBits - ExtractOffsetInBits;
5720
5721
5722 int64_t BitExtractOffset =
5723 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5724
5725
5726
5727
5728 OffestFromNewAllocaInBits =
5729 std::max(int64_t(0), OffestFromNewAllocaInBits);
5730
5731
5732
5733
5734
5735 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5736 if (OffestFromNewAllocaInBits > 0) {
5737 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5739 }
5740
5741
5742
5743 auto RemoveOne = [DbgVariable](auto *OldDII) {
5744 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5745 return LHS->getVariable() == RHS->getVariable() &&
5746 LHS->getDebugLoc()->getInlinedAt() ==
5747 RHS->getDebugLoc()->getInlinedAt();
5748 };
5749 if (SameVariableFragment(OldDII, DbgVariable))
5750 OldDII->eraseFromParent();
5751 };
5754 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5755 NewDbgFragment, BitExtractOffset);
5756 }
5757 };
5758
5759
5760
5764
5766}
5767
5768
5769void SROA::clobberUse(Use &U) {
5771
5773
5774
5775
5776
5779 DeadInsts.push_back(OldI);
5780 }
5781}
5782
5783
5785public:
5792
5796
5797private:
5798 Type *ZeroType;
5799};
5800
5801bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5802
5803
5804
5805
5806 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5807 bool AllSameAndValid = true;
5808 Type *PartitionType = nullptr;
5810 uint64_t BeginOffset = 0;
5811 uint64_t EndOffset = 0;
5812
5813 auto Flush = [&]() {
5814 if (AllSameAndValid && !Insts.empty()) {
5815 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5816 << EndOffset << ")\n");
5818 SSAUpdater SSA(&NewPHIs);
5820 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5821 Promoter.run(Insts);
5822 }
5823 AllSameAndValid = true;
5824 PartitionType = nullptr;
5826 };
5827
5828 for (Slice &S : AS) {
5832 dbgs() << "Ignoring slice: ";
5833 AS.print(dbgs(), &S);
5834 });
5835 continue;
5836 }
5837 if (S.beginOffset() >= EndOffset) {
5838 Flush();
5839 BeginOffset = S.beginOffset();
5840 EndOffset = S.endOffset();
5841 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5842 if (AllSameAndValid) {
5844 dbgs() << "Slice does not match range [" << BeginOffset << ", "
5845 << EndOffset << ")";
5846 AS.print(dbgs(), &S);
5847 });
5848 AllSameAndValid = false;
5849 }
5850 EndOffset = std::max(EndOffset, S.endOffset());
5851 continue;
5852 }
5853
5856
5857 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5858 AllSameAndValid = false;
5859 PartitionType = UserTy;
5862 Type *UserTy = SI->getValueOperand()->getType();
5863 if (->isSimple() || (PartitionType && UserTy != PartitionType))
5864 AllSameAndValid = false;
5865 PartitionType = UserTy;
5867 } else {
5868 AllSameAndValid = false;
5869 }
5870 }
5871
5872 Flush();
5873 return true;
5874}
5875
5876
5877
5878
5879
5880
5881std::pair<bool , bool >
5882SROA::runOnAlloca(AllocaInst &AI) {
5884 bool CFGChanged = false;
5885
5886 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5887 ++NumAllocasAnalyzed;
5888
5889
5890 if (AI.use_empty()) {
5891 AI.eraseFromParent();
5893 return {Changed, CFGChanged};
5894 }
5895 const DataLayout &DL = AI.getDataLayout();
5896
5897
5898 auto *AT = AI.getAllocatedType();
5899 TypeSize Size = DL.getTypeAllocSize(AT);
5900 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
5901 Size.getFixedValue() == 0)
5902 return {Changed, CFGChanged};
5903
5904
5905
5906 IRBuilderTy IRB(&AI);
5907 AggLoadStoreRewriter AggRewriter(DL, IRB);
5908 Changed |= AggRewriter.rewrite(AI);
5909
5910
5911 AllocaSlices AS(DL, AI);
5913 if (AS.isEscaped())
5914 return {Changed, CFGChanged};
5915
5916 if (AS.isEscapedReadOnly()) {
5917 Changed |= propagateStoredValuesToLoads(AI, AS);
5918 return {Changed, CFGChanged};
5919 }
5920
5921 for (auto &P : AS.partitions()) {
5922
5923
5924
5925
5926
5927 std::optional<Value *> ProtectedFieldDisc;
5928 auto SliceHasMismatch = [&](Slice &S) {
5930 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
5931 II->getIntrinsicID() == Intrinsic::lifetime_end)
5932 return false;
5933 if (!ProtectedFieldDisc)
5934 ProtectedFieldDisc = S.ProtectedFieldDisc;
5935 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5936 };
5937 for (Slice &S : P)
5938 if (SliceHasMismatch(S))
5939 return {Changed, CFGChanged};
5940 for (Slice *S : P.splitSliceTails())
5941 if (SliceHasMismatch(*S))
5942 return {Changed, CFGChanged};
5943 }
5944
5945
5946 for (Instruction *DeadUser : AS.getDeadUsers()) {
5947
5948 for (Use &DeadOp : DeadUser->operands())
5949 clobberUse(DeadOp);
5950
5951
5952 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
5953
5954
5955 DeadInsts.push_back(DeadUser);
5957 }
5958 for (Use *DeadOp : AS.getDeadOperands()) {
5959 clobberUse(*DeadOp);
5961 }
5962 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
5963 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
5964
5965 DeadInsts.push_back(PFPUser);
5967 }
5968
5969
5970 if (AS.begin() == AS.end())
5971 return {Changed, CFGChanged};
5972
5973 Changed |= splitAlloca(AI, AS);
5974
5976 while (!SpeculatablePHIs.empty())
5978
5980 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5981 while (!RemainingSelectsToRewrite.empty()) {
5982 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
5983 CFGChanged |=
5985 }
5986
5987 return {Changed, CFGChanged};
5988}
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999bool SROA::deleteDeadInstructions(
6000 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6002 while (!DeadInsts.empty()) {
6004 if ()
6005 continue;
6006 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
6007
6008
6009
6010
6012 DeletedAllocas.insert(AI);
6014 OldDII->eraseFromParent();
6015 }
6016
6019
6020 for (Use &Operand : I->operands())
6022
6023 Operand = nullptr;
6025 DeadInsts.push_back(U);
6026 }
6027
6028 ++NumDeleted;
6029 I->eraseFromParent();
6031 }
6033}
6034
6035
6036
6037
6038
6039bool SROA::promoteAllocas() {
6040 if (PromotableAllocas.empty())
6041 return false;
6042
6044 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
6045 } else {
6046 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
6047 NumPromoted += PromotableAllocas.size();
6048 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6049 }
6050
6051 PromotableAllocas.clear();
6052 return true;
6053}
6054
6055std::pair<bool , bool > SROA::runSROA(Function &F) {
6056 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
6057
6058 const DataLayout &DL = F.getDataLayout();
6059 BasicBlock &EntryBB = F.getEntryBlock();
6063 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
6065 PromotableAllocas.insert(AI);
6066 else
6067 Worklist.insert(AI);
6068 }
6069 }
6070
6072 bool CFGChanged = false;
6073
6074
6075 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6076
6077 do {
6078 while (!Worklist.empty()) {
6079 auto [IterationChanged, IterationCFGChanged] =
6080 runOnAlloca(*Worklist.pop_back_val());
6081 Changed |= IterationChanged;
6082 CFGChanged |= IterationCFGChanged;
6083
6084 Changed |= deleteDeadInstructions(DeletedAllocas);
6085
6086
6087
6088 if (!DeletedAllocas.empty()) {
6089 Worklist.set_subtract(DeletedAllocas);
6090 PostPromotionWorklist.set_subtract(DeletedAllocas);
6091 PromotableAllocas.set_subtract(DeletedAllocas);
6092 DeletedAllocas.clear();
6093 }
6094 }
6095
6096 Changed |= promoteAllocas();
6097
6098 Worklist = PostPromotionWorklist;
6099 PostPromotionWorklist.clear();
6100 } while (!Worklist.empty());
6101
6102 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
6104 "Should not have modified the CFG when told to preserve it.");
6105
6107 for (auto &BB : F) {
6109 }
6110 }
6111
6112 return {Changed, CFGChanged};
6113}
6114
6118 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6119 auto [Changed, CFGChanged] =
6120 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6124 if (!CFGChanged)
6127 return PA;
6128}
6129
6133 OS, MapClassName2PassName);
6135 : "");
6136}
6137
6139
6140namespace {
6141
6142
6145
6146public:
6147 static char ID;
6148
6152 }
6153
6155 if (skipFunction(F))
6156 return false;
6157
6158 DominatorTree &DT = getAnalysis().getDomTree();
6160 getAnalysis().getAssumptionCache(F);
6161 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6163 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6165 }
6166
6167 void getAnalysisUsage(AnalysisUsage &AU) const override {
6168 AU.addRequired();
6169 AU.addRequired();
6171 AU.addPreserved();
6172 }
6173
6174 StringRef getPassName() const override { return "SROA"; }
6175};
6176
6177}
6178
6179char SROALegacyPass::ID = 0;
6180
6185
6187 "Scalar Replacement Of Aggregates", false, false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition SROA.cpp:343
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition SROA.cpp:2376
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition SROA.cpp:1961
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
Definition SROA.cpp:2227
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition SROA.cpp:1527
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition SROA.cpp:4580
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition SROA.cpp:278
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition SROA.cpp:5449
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2619
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
Definition SROA.cpp:2152
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition SROA.cpp:1950
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition SROA.cpp:2458
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition SROA.cpp:2652
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition SROA.cpp:1020
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition SROA.cpp:1916
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition SROA.cpp:1849
static Value * foldSelectInst(SelectInst &SI)
Definition SROA.cpp:1007
bool isKillAddress(const DbgVariableRecord *DVR)
Definition SROA.cpp:5412
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition SROA.cpp:2674
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition SROA.cpp:2553
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition SROA.cpp:1667
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
Definition SROA.cpp:2332
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
Definition SROA.cpp:324
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition SROA.cpp:1593
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2594
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
Definition SROA.cpp:5514
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition SROA.cpp:1810
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
Definition SROA.cpp:2748
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
Definition SROA.cpp:5418
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition SROA.cpp:4618
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
Definition SROA.cpp:1971
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition SROA.cpp:1748
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition SROA.cpp:2061
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
Definition SROA.cpp:1032
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition SROA.cpp:1052
An iterator over partitions of the alloca's slices.
Definition SROA.cpp:820
bool operator==(const partition_iterator &RHS) const
Definition SROA.cpp:967
friend class AllocaSlices
Definition SROA.cpp:821
partition_iterator & operator++()
Definition SROA.cpp:987
Partition & operator*()
Definition SROA.cpp:992
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition SROA.cpp:6115
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition SROA.cpp:6130
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition SROA.cpp:6138
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
Offsets
Offsets in bytes from the start of the input buffer.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition SROA.cpp:6181
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.