LLVM: lib/Transforms/Scalar/SROA.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
47#include "llvm/Config/llvm-config.h"
88#include
89#include
90#include
91#include
92#include
93#include
94#include
95#include
96#include
97#include
98#include
99#include
100
101using namespace llvm;
102
103#define DEBUG_TYPE "sroa"
104
105STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
114 "Number of loads rewritten into predicated loads to allow promotion");
116 NumStoresPredicated,
117 "Number of stores rewritten into predicated loads to allow promotion");
118STATISTIC(NumDeleted, "Number of instructions deleted");
119STATISTIC(NumVectorized, "Number of vectorized aggregates");
120
121namespace llvm {
122
126}
127
128namespace {
129
130class AllocaSliceRewriter;
131class AllocaSlices;
132class Partition;
133
134class SelectHandSpeculativity {
135 unsigned char Storage = 0;
138public:
139 SelectHandSpeculativity() = default;
140 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
141 bool isSpeculatable(bool isTrueVal) const;
142 bool areAllSpeculatable() const;
143 bool areAnySpeculatable() const;
144 bool areNoneSpeculatable() const;
145
146 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
147 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
148};
149static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
150
151using PossiblySpeculatableLoad =
153using UnspeculatableStore = StoreInst *;
154using RewriteableMemOp =
155 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176class SROA {
177 LLVMContext *const C;
178 DomTreeUpdater *const DTU;
179 AssumptionCache *const AC;
180 const bool PreserveCFG;
181
182
183
184
185
186
187
188
189 SmallSetVector<AllocaInst *, 16> Worklist;
190
191
192
193
195
196
197
198
199
200
201
202
203
204 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
205
206
207 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
208 SmallPtrSet<AllocaInst *, 16>, 16>
209 PromotableAllocas;
210
211
212
213
214
215
216 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
217
218
219
220 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238 static std::optional
239 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
240
241public:
242 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 : C(C), DTU(DTU), AC(AC),
245 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
246
247
248 std::pair<bool , bool > runSROA(Function &F);
249
250private:
251 friend class AllocaSliceRewriter;
252
253 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
254 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
255 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
256 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
257 std::pair<bool , bool > runOnAlloca(AllocaInst &AI);
258 void clobberUse(Use &U);
259 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
260 bool promoteAllocas();
261};
262
263}
264
265
266
267
268
269
270
271
272
273namespace {
274enum FragCalcResult { UseFrag, UseNoFrag, Skip };
275}
276static FragCalcResult
278 uint64_t NewStorageSliceOffsetInBits,
279 uint64_t NewStorageSliceSizeInBits,
280 std::optionalDIExpression::FragmentInfo StorageFragment,
281 std::optionalDIExpression::FragmentInfo CurrentFragment,
283
284
285 if (StorageFragment) {
287 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
288 Target.OffsetInBits =
289 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
290 } else {
291 Target.SizeInBits = NewStorageSliceSizeInBits;
292 Target.OffsetInBits = NewStorageSliceOffsetInBits;
293 }
294
295
296
297
298 if (!CurrentFragment) {
299 if (auto Size = Variable->getSizeInBits()) {
300
302 if (Target == CurrentFragment)
303 return UseNoFrag;
304 }
305 }
306
307
308
309 if (!CurrentFragment || *CurrentFragment == Target)
310 return UseFrag;
311
312
313
314
315 if (Target.startInBits() < CurrentFragment->startInBits() ||
316 Target.endInBits() > CurrentFragment->endInBits())
317 return Skip;
318
319
320 return UseFrag;
321}
322
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
343 uint64_t OldAllocaOffsetInBits,
347
348
349
350
352
354
355 if (DVRAssignMarkerRange.empty())
356 return;
357
359 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
360 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
361 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
362 << "\n");
363 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
364 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
369
370
372 BaseFragments;
375 DVR->getExpression()->getFragmentInfo();
376
377
378
384
386 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
387 << "\n");
388 auto *Expr = DbgAssign->getExpression();
389 bool SetKillLocation = false;
390
391 if (IsSplit) {
392 std::optionalDIExpression::FragmentInfo BaseFragment;
393 {
395 if (R == BaseFragments.end())
396 return;
397 BaseFragment = R->second;
398 }
399 std::optionalDIExpression::FragmentInfo CurrentFragment =
400 Expr->getFragmentInfo();
403 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
404 BaseFragment, CurrentFragment, NewFragment);
405
406 if (Result == Skip)
407 return;
408 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
409 if (CurrentFragment) {
410
411
412
413
414 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
415 }
416
419 Expr = *E;
420 } else {
421
422
423
427 SetKillLocation = true;
428 }
429 }
430 }
431
432
433 if (!NewID) {
435 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
436 }
437
439 if (IsSplit) {
442 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
444 DbgAssign->getDebugLoc())));
445 } else {
446
447 NewAssign = DbgAssign;
448 NewAssign->setAssignId(NewID);
453 }
454
455
456
457
458
459
460
461
462
463
464
465 SetKillLocation |=
466 Value && (DbgAssign->hasArgList() ||
467 !DbgAssign->getExpression()->isSingleLocationExpression());
468 if (SetKillLocation)
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484 if (NewAssign != DbgAssign) {
485 NewAssign->moveBefore(DbgAssign->getIterator());
486 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
487 }
488 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
489 };
490
491 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
492}
493
494namespace {
495
496
497
499 std::string Prefix;
500
501 Twine getNameWithPrefix(const Twine &Name) const {
502 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
503 }
504
505public:
506 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
507
508 void InsertHelper(Instruction *I, const Twine &Name,
511 InsertPt);
512 }
513};
514
515
517
518
519
520
521
522
523
524class Slice {
525
526 uint64_t BeginOffset = 0;
527
528
529 uint64_t EndOffset = 0;
530
531
532
533 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
534
535public:
536 Slice() = default;
537
538 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
539 : BeginOffset(BeginOffset), EndOffset(EndOffset),
540 UseAndIsSplittable(U, IsSplittable) {}
541
542 uint64_t beginOffset() const { return BeginOffset; }
543 uint64_t endOffset() const { return EndOffset; }
544
545 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
546 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
547
548 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
549
550 bool isDead() const { return getUse() == nullptr; }
551 void kill() { UseAndIsSplittable.setPointer(nullptr); }
552
553
554
555
556
557
558
560 if (beginOffset() < RHS.beginOffset())
561 return true;
562 if (beginOffset() > RHS.beginOffset())
563 return false;
564 if (isSplittable() != RHS.isSplittable())
565 return !isSplittable();
566 if (endOffset() > RHS.endOffset())
567 return true;
568 return false;
569 }
570
571
572 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
573 return LHS.beginOffset() < RHSOffset;
574 }
575 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
576 return LHSOffset < RHS.beginOffset();
577 }
578
580 return isSplittable() == RHS.isSplittable() &&
581 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
582 }
584};
585
586
587
588
589
590
591
592
593class AllocaSlices {
594public:
595
596 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
597
598
599
600
601
602 bool isEscaped() const { return PointerEscapingInstr; }
603 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
604
605
606
608 using range = iterator_range;
609
610 iterator begin() { return Slices.begin(); }
611 iterator end() { return Slices.end(); }
612
614 using const_range = iterator_range<const_iterator>;
615
616 const_iterator begin() const { return Slices.begin(); }
617 const_iterator end() const { return Slices.end(); }
618
619
620
621 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
622
623
624
625
626
627
629 int OldSize = Slices.size();
630 Slices.append(NewSlices.begin(), NewSlices.end());
631 auto SliceI = Slices.begin() + OldSize;
632 std::stable_sort(SliceI, Slices.end());
633 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
634 }
635
636
637
640
641
643
644
646 return DeadUseIfPromotable;
647 }
648
649
650
651
652
653
654
655 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
656
657#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
658 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
659 void printSlice(raw_ostream &OS, const_iterator I,
660 StringRef Indent = " ") const;
661 void printUse(raw_ostream &OS, const_iterator I,
662 StringRef Indent = " ") const;
663 void print(raw_ostream &OS) const;
664 void dump(const_iterator I) const;
665 void dump() const;
666#endif
667
668private:
669 template <typename DerivedT, typename RetT = void> class BuilderBase;
671
672 friend class AllocaSlices::SliceBuilder;
673
674#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
675
676 AllocaInst &AI;
677#endif
678
679
680
681
682
683
684
686 Instruction *PointerEscapingInstrReadOnly;
687
688
689
690
691
692
693
695
696
697
698
699
700
701
702 SmallVector<Instruction *, 8> DeadUsers;
703
704
706
707
708
709
710
711
712
713
714
716};
717
718
719
720
721
722
723
724
725
726
727class Partition {
728private:
729 friend class AllocaSlices;
730 friend class AllocaSlices::partition_iterator;
731
732 using iterator = AllocaSlices::iterator;
733
734
735
736 uint64_t BeginOffset = 0, EndOffset = 0;
737
738
739 iterator SI, SJ;
740
741
743
744
745
746 Partition(iterator SI) : SI(SI), SJ(SI) {}
747
748public:
749
750
751
752 uint64_t beginOffset() const { return BeginOffset; }
753
754
755
756
757 uint64_t endOffset() const { return EndOffset; }
758
759
760
761
762 uint64_t size() const {
763 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
764 return EndOffset - BeginOffset;
765 }
766
767
768
769 bool empty() const { return SI == SJ; }
770
771
772
773
774
775
776
777
778
779
780 iterator begin() const { return SI; }
781 iterator end() const { return SJ; }
782
783
784
785
786
787
788
790};
791
792}
793
794
795
796
797
798
799
800
801
802
805 Partition> {
807
808
809
810 Partition P;
811
812
813 AllocaSlices::iterator SE;
814
815
816
817 uint64_t MaxSplitSliceEndOffset = 0;
818
819
820
821 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
822 : P(SI), SE(SE) {
823
824
825 if (SI != SE)
826 advance();
827 }
828
829
830
831
832 void advance() {
833 assert((P.SI != SE || .SplitTails.empty()) &&
834 "Cannot advance past the end of the slices!");
835
836
837 if (.SplitTails.empty()) {
838 if (P.EndOffset >= MaxSplitSliceEndOffset) {
839
840 P.SplitTails.clear();
841 MaxSplitSliceEndOffset = 0;
842 } else {
843
844
845
847 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
849 [&](Slice *S) {
850 return S->endOffset() == MaxSplitSliceEndOffset;
851 }) &&
852 "Could not find the current max split slice offset!");
854 [&](Slice *S) {
855 return S->endOffset() <= MaxSplitSliceEndOffset;
856 }) &&
857 "Max split slice end offset is not actually the max!");
858 }
859 }
860
861
862
863 if (P.SI == SE) {
864 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
865 return;
866 }
867
868
869
870 if (P.SI != P.SJ) {
871
872
873 for (Slice &S : P)
874 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
875 P.SplitTails.push_back(&S);
876 MaxSplitSliceEndOffset =
877 std::max(S.endOffset(), MaxSplitSliceEndOffset);
878 }
879
880
881 P.SI = P.SJ;
882
883
884 if (P.SI == SE) {
885 P.BeginOffset = P.EndOffset;
886 P.EndOffset = MaxSplitSliceEndOffset;
887 return;
888 }
889
890
891
892
893 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
894 !P.SI->isSplittable()) {
895 P.BeginOffset = P.EndOffset;
896 P.EndOffset = P.SI->beginOffset();
897 return;
898 }
899 }
900
901
902
903
904
905
906 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
907 P.EndOffset = P.SI->endOffset();
908 ++P.SJ;
909
910
911
912 if (!P.SI->isSplittable()) {
913
914
915 assert(P.BeginOffset == P.SI->beginOffset());
916
917
918
919 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
920 if (!P.SJ->isSplittable())
921 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
922 ++P.SJ;
923 }
924
925
926
927 return;
928 }
929
930
931
932
933 assert(P.SI->isSplittable() && "Forming a splittable partition!");
934
935
936 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
937 P.SJ->isSplittable()) {
938 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
939 ++P.SJ;
940 }
941
942
943
944
945 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
946 assert(!P.SJ->isSplittable());
947 P.EndOffset = P.SJ->beginOffset();
948 }
949 }
950
951public:
952 bool operator==(const partition_iterator &RHS) const {
953 assert(SE == RHS.SE &&
954 "End iterators don't match between compared partition iterators!");
955
956
957
958
959
960
961 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
962 assert(P.SJ == RHS.P.SJ &&
963 "Same set of slices formed two different sized partitions!");
964 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
965 "Same slice position with differently sized non-empty split "
966 "slice tails!");
967 return true;
968 }
969 return false;
970 }
971
973 advance();
974 return *this;
975 }
976
978};
979
980
981
982
983
984
985
986
988 return make_range(partition_iterator(begin(), end()),
989 partition_iterator(end(), end()));
990}
991
993
994
995
997 return SI.getOperand(1 + CI->isZero());
998 if (SI.getOperand(1) == SI.getOperand(2))
999 return SI.getOperand(1);
1000
1001 return nullptr;
1002}
1003
1004
1007
1008 return PN->hasConstantValue();
1009 }
1011}
1012
1013
1014
1015
1016
1020
1022
1024 AllocaSlices &AS;
1025
1028
1029
1031
1032public:
1035 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1036 AS(AS) {}
1037
1038private:
1040 if (VisitedDeadInsts.insert(&I).second)
1042 }
1043
1045 bool IsSplittable = false) {
1046
1047
1048 if (Size == 0 || Offset.uge(AllocSize)) {
1051 << " which has zero size or starts outside of the "
1052 << AllocSize << " byte alloca:\n"
1053 << " alloca: " << AS.AI << "\n"
1054 << " use: " << I << "\n");
1055 return markAsDead(I);
1056 }
1057
1058 uint64_t BeginOffset = Offset.getZExtValue();
1059 uint64_t EndOffset = BeginOffset + Size;
1060
1061
1062
1063
1064
1065
1066
1067 assert(AllocSize >= BeginOffset);
1068 if (Size > AllocSize - BeginOffset) {
1069 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1070 << Offset << " to remain within the " << AllocSize
1071 << " byte alloca:\n"
1072 << " alloca: " << AS.AI << "\n"
1073 << " use: " << I << "\n");
1074 EndOffset = AllocSize;
1075 }
1076
1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1078 }
1079
1080 void visitBitCastInst(BitCastInst &BC) {
1082 return markAsDead(BC);
1083
1084 return Base::visitBitCastInst(BC);
1085 }
1086
1087 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1089 return markAsDead(ASC);
1090
1091 return Base::visitAddrSpaceCastInst(ASC);
1092 }
1093
1094 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1096 return markAsDead(GEPI);
1097
1098 return Base::visitGetElementPtrInst(GEPI);
1099 }
1100
1101 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1102 uint64_t Size, bool IsVolatile) {
1103
1104
1105
1106 bool IsSplittable =
1108
1109 insertUse(I, Offset, Size, IsSplittable);
1110 }
1111
1112 void visitLoadInst(LoadInst &LI) {
1114 "All simple FCA loads should have been pre-split");
1115
1116
1117
1118 if (!IsOffsetKnown)
1119 return PI.setEscapedReadOnly(&LI);
1120
1121 TypeSize Size = DL.getTypeStoreSize(LI.getType());
1122 if (Size.isScalable()) {
1124 if (!VScale)
1125 return PI.setAborted(&LI);
1126
1128 }
1129
1130 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1132 }
1133
1134 void visitStoreInst(StoreInst &SI) {
1135 Value *ValOp = SI.getValueOperand();
1136 if (ValOp == *U)
1137 return PI.setEscapedAndAborted(&SI);
1138 if (!IsOffsetKnown)
1139 return PI.setAborted(&SI);
1140
1141 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1143 unsigned VScale = SI.getFunction()->getVScaleValue();
1144 if (!VScale)
1145 return PI.setAborted(&SI);
1146
1148 }
1149
1151
1152
1153
1154
1155
1156
1157
1158
1159 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1160 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1161 << Offset << " which extends past the end of the "
1162 << AllocSize << " byte alloca:\n"
1163 << " alloca: " << AS.AI << "\n"
1164 << " use: " << SI << "\n");
1165 return markAsDead(SI);
1166 }
1167
1169 "All simple FCA stores should have been pre-split");
1171 }
1172
1173 void visitMemSetInst(MemSetInst &II) {
1174 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1177 (IsOffsetKnown && Offset.uge(AllocSize)))
1178
1179 return markAsDead(II);
1180
1181 if (!IsOffsetKnown)
1182 return PI.setAborted(&II);
1183
1186 : AllocSize - Offset.getLimitedValue(),
1188 }
1189
1190 void visitMemTransferInst(MemTransferInst &II) {
1193
1194 return markAsDead(II);
1195
1196
1197
1198 if (VisitedDeadInsts.count(&II))
1199 return;
1200
1201 if (!IsOffsetKnown)
1202 return PI.setAborted(&II);
1203
1204
1205
1206
1207
1208
1209 if (Offset.uge(AllocSize)) {
1210 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1211 MemTransferSliceMap.find(&II);
1212 if (MTPI != MemTransferSliceMap.end())
1213 AS.Slices[MTPI->second].kill();
1214 return markAsDead(II);
1215 }
1216
1217 uint64_t RawOffset = Offset.getLimitedValue();
1218 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1219
1220
1221
1222 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1223
1224 if (.isVolatile())
1225 return markAsDead(II);
1226
1227 return insertUse(II, Offset, Size, false);
1228 }
1229
1230
1231
1233 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1234 std::tie(MTPI, Inserted) =
1235 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1236 unsigned PrevIdx = MTPI->second;
1237 if (!Inserted) {
1238 Slice &PrevP = AS.Slices[PrevIdx];
1239
1240
1241
1242 if (.isVolatile() && PrevP.beginOffset() == RawOffset) {
1243 PrevP.kill();
1244 return markAsDead(II);
1245 }
1246
1247
1248
1249 PrevP.makeUnsplittable();
1250 }
1251
1252
1254
1255
1256 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1257 "Map index doesn't point back to a slice with this user.");
1258 }
1259
1260
1261
1262
1263 void visitIntrinsicInst(IntrinsicInst &II) {
1264 if (II.isDroppable()) {
1265 AS.DeadUseIfPromotable.push_back(U);
1266 return;
1267 }
1268
1269 if (!IsOffsetKnown)
1270 return PI.setAborted(&II);
1271
1272 if (II.isLifetimeStartOrEnd()) {
1273 insertUse(II, Offset, AllocSize, true);
1274 return;
1275 }
1276
1277 Base::visitIntrinsicInst(II);
1278 }
1279
1280 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1281
1282
1283
1284
1285 SmallPtrSet<Instruction *, 4> Visited;
1287 Visited.insert(Root);
1290
1291
1293 do {
1295 std::tie(UsedI, I) = Uses.pop_back_val();
1296
1298 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1300 PI.setAborted(LI);
1301 return nullptr;
1302 }
1304 continue;
1305 }
1308 if (Op == UsedI)
1309 return SI;
1310 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1312 PI.setAborted(SI);
1313 return nullptr;
1314 }
1316 continue;
1317 }
1318
1320 if (->hasAllZeroIndices())
1321 return GEP;
1324 return I;
1325 }
1326
1327 for (User *U : I->users())
1330 } while (.empty());
1331
1332 return nullptr;
1333 }
1334
1335 void visitPHINodeOrSelectInst(Instruction &I) {
1337 if (I.use_empty())
1338 return markAsDead(I);
1339
1340
1341
1342
1344 I.getParent()->getFirstInsertionPt() == I.getParent()->end())
1345 return PI.setAborted(&I);
1346
1347
1348
1349
1350
1351
1352
1353
1354
1356 if (Result == *U)
1357
1358
1359 enqueueUsers(I);
1360 else
1361
1362
1363 AS.DeadOperands.push_back(U);
1364
1365 return;
1366 }
1367
1368 if (!IsOffsetKnown)
1369 return PI.setAborted(&I);
1370
1371
1372 uint64_t &Size = PHIOrSelectSizes[&I];
1373 if () {
1374
1375 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1376 return PI.setAborted(UnsafeI);
1377 }
1378
1379
1380
1381
1382
1383
1384
1385 if (Offset.uge(AllocSize)) {
1386 AS.DeadOperands.push_back(U);
1387 return;
1388 }
1389
1391 }
1392
1393 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1394
1395 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1396
1397
1398 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1399
1400 void visitCallBase(CallBase &CB) {
1401
1402
1406 PI.setEscapedReadOnly(&CB);
1407 return;
1408 }
1409
1410 Base::visitCallBase(CB);
1411 }
1412};
1413
1414AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1415 :
1416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1417 AI(AI),
1418#endif
1419 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1421 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1422 if (PtrI.isEscaped() || PtrI.isAborted()) {
1423
1424
1425 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1426 : PtrI.getAbortingInst();
1427 assert(PointerEscapingInstr && "Did not track a bad instruction");
1428 return;
1429 }
1430 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1431
1432 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1433
1434
1435
1437}
1438
1439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1440
1441void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1442 StringRef Indent) const {
1443 printSlice(OS, I, Indent);
1444 OS << "\n";
1445 printUse(OS, I, Indent);
1446}
1447
1448void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1449 StringRef Indent) const {
1450 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1451 << " slice #" << (I - begin())
1452 << (I->isSplittable() ? " (splittable)" : "");
1453}
1454
1455void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1456 StringRef Indent) const {
1457 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1458}
1459
1460void AllocaSlices::print(raw_ostream &OS) const {
1461 if (PointerEscapingInstr) {
1462 OS << "Can't analyze slices for alloca: " << AI << "\n"
1463 << " A pointer to this alloca escaped by:\n"
1464 << " " << *PointerEscapingInstr << "\n";
1465 return;
1466 }
1467
1468 if (PointerEscapingInstrReadOnly)
1469 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1470
1471 OS << "Slices of alloca: " << AI << "\n";
1472 for (const_iterator I = begin(), E = end(); I != E; ++I)
1474}
1475
1476LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1478}
1480
1481#endif
1482
1483
1484
1485static std::pair<Type *, IntegerType *>
1486findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1488 Type *Ty = nullptr;
1489 bool TyIsCommon = true;
1491
1492
1493
1494 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1497 continue;
1498 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1499 continue;
1500
1501 Type *UserTy = nullptr;
1505 UserTy = SI->getValueOperand()->getType();
1506 }
1507
1509
1510
1511
1512
1513 if (UserITy->getBitWidth() % 8 != 0 ||
1514 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1515 continue;
1516
1517
1518
1519 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1520 ITy = UserITy;
1521 }
1522
1523
1524
1525 if (!UserTy || (Ty && Ty != UserTy))
1526 TyIsCommon = false;
1527 else
1528 Ty = UserTy;
1529 }
1530
1531 return {TyIsCommon ? Ty : nullptr, ITy};
1532}
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1554
1555
1556
1557
1558
1562 Type *LoadType = nullptr;
1566 return false;
1567
1568
1569
1570
1572 return false;
1573
1574 if (LoadType) {
1575 if (LoadType != LI->getType())
1576 return false;
1577 } else {
1578 LoadType = LI->getType();
1579 }
1580
1581
1582
1584 if (BBI->mayWriteToMemory())
1585 return false;
1586
1587 MaxAlign = std::max(MaxAlign, LI->getAlign());
1588 }
1589
1590 if (!LoadType)
1591 return false;
1592
1593 APInt LoadSize =
1594 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1595
1596
1597
1598
1599 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1602
1603
1604
1605
1607 return false;
1608
1609
1610
1612 continue;
1613
1614
1615
1616
1618 continue;
1619
1620 return false;
1621 }
1622
1623 return true;
1624}
1625
1628
1631 IRB.SetInsertPoint(&PN);
1633 PN.getName() + ".sroa.speculated");
1634
1635
1636
1639
1640
1645 }
1646
1647
1649 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1652
1653
1654
1655
1656
1657 if (Value *V = InjectedLoads.lookup(Pred)) {
1659 continue;
1660 }
1661
1662 Instruction *TI = Pred->getTerminator();
1663 IRB.SetInsertPoint(TI);
1664
1665 LoadInst *Load = IRB.CreateAlignedLoad(
1666 LoadTy, InVal, Alignment,
1667 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1668 ++NumLoadsSpeculated;
1669 if (AATags)
1670 Load->setAAMetadata(AATags);
1672 InjectedLoads[Pred] = Load;
1673 }
1674
1675 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1677}
1678
1679SelectHandSpeculativity &
1680SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1681 if (isTrueVal)
1683 else
1685 return *this;
1686}
1687
1688bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1691}
1692
1693bool SelectHandSpeculativity::areAllSpeculatable() const {
1694 return isSpeculatable(true) &&
1695 isSpeculatable(false);
1696}
1697
1698bool SelectHandSpeculativity::areAnySpeculatable() const {
1699 return isSpeculatable(true) ||
1700 isSpeculatable(false);
1701}
1702bool SelectHandSpeculativity::areNoneSpeculatable() const {
1703 return !areAnySpeculatable();
1704}
1705
1706static SelectHandSpeculativity
1709 SelectHandSpeculativity Spec;
1710
1712 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1714 &LI))
1715 Spec.setAsSpeculatable(Value == SI.getTrueValue());
1717 return Spec;
1718
1719 return Spec;
1720}
1721
1722std::optional
1723SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1724 RewriteableMemOps Ops;
1725
1726 for (User *U : SI.users()) {
1729
1731
1732
1733
1735 return {};
1736 Ops.emplace_back(Store);
1737 continue;
1738 }
1739
1741
1742
1743
1745 return {};
1746
1747 PossiblySpeculatableLoad Load(LI);
1749
1750
1752 return {};
1753 Ops.emplace_back(Load);
1754 continue;
1755 }
1756
1757 SelectHandSpeculativity Spec =
1759 if (PreserveCFG && !Spec.areAllSpeculatable())
1760 return {};
1761
1762 Load.setInt(Spec);
1763 Ops.emplace_back(Load);
1764 }
1765
1766 return Ops;
1767}
1768
1770 IRBuilderTy &IRB) {
1772
1773 Value *TV = SI.getTrueValue();
1774 Value *FV = SI.getFalseValue();
1775
1776
1777 assert(LI.isSimple() && "We only speculate simple loads");
1778
1779 IRB.SetInsertPoint(&LI);
1780
1783 LI.getName() + ".sroa.speculate.load.true");
1786 LI.getName() + ".sroa.speculate.load.false");
1787 NumLoadsSpeculated += 2;
1788
1789
1792
1794 if (Tags) {
1797 }
1798
1799 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1800 LI.getName() + ".sroa.speculated",
1802
1803 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1805}
1806
1807template
1809 SelectHandSpeculativity Spec,
1812 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1816 if (Spec.areNoneSpeculatable())
1818 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1819 else {
1821 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1822 nullptr, nullptr);
1823 if (Spec.isSpeculatable(true))
1825 }
1827 Spec = {};
1829 Tail->setName(Head->getName() + ".cont");
1834 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1835 int SuccIdx = IsThen ? 0 : 1;
1836 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1837 auto &CondMemOp = cast(*I.clone());
1838 if (NewMemOpBB != Head) {
1839 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1841 ++NumLoadsPredicated;
1842 else
1843 ++NumStoresPredicated;
1844 } else {
1845 CondMemOp.dropUBImplyingAttrsAndMetadata();
1846 ++NumLoadsSpeculated;
1847 }
1848 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1849 Value *Ptr = SI.getOperand(1 + SuccIdx);
1850 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1852 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1853 PN->addIncoming(&CondMemOp, NewMemOpBB);
1854 } else
1856 }
1860 I.replaceAllUsesWith(PN);
1861 }
1862}
1863
1865 SelectHandSpeculativity Spec,
1871 else
1873}
1874
1876 const RewriteableMemOps &Ops,
1878 bool CFGChanged = false;
1880
1881 for (const RewriteableMemOp &Op : Ops) {
1882 SelectHandSpeculativity Spec;
1884 if (auto *const *US = std::get_if(&Op)) {
1885 I = *US;
1886 } else {
1887 auto PSL = std::get(Op);
1888 I = PSL.getPointer();
1889 Spec = PSL.getInt();
1890 }
1891 if (Spec.areAllSpeculatable()) {
1893 } else {
1894 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1896 CFGChanged = true;
1897 }
1898 I->eraseFromParent();
1899 }
1900
1903 SI.eraseFromParent();
1904 return CFGChanged;
1905}
1906
1907
1908
1911 const Twine &NamePrefix) {
1913 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1914 NamePrefix + "sroa_idx");
1915 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1916 NamePrefix + "sroa_cast");
1917}
1918
1919
1923
1924
1925
1926
1927
1928
1929
1931 unsigned VScale = 0) {
1932 if (OldTy == NewTy)
1933 return true;
1934
1935
1936
1937
1941 "We can't have the same bitwidth for different int types");
1942 return false;
1943 }
1944
1945 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
1946 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
1947
1950
1951 if (!VScale)
1952 return false;
1953
1954
1955
1956
1957 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
1958 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
1959
1962 return false;
1963
1965 } else {
1967 return false;
1968
1970 }
1971 }
1972
1973 if (NewSize != OldSize)
1974 return false;
1976 return false;
1977
1978
1979
1986
1987
1988
1989 return OldAS == NewAS ||
1990 (.isNonIntegralAddressSpace(OldAS) &&
1991 .isNonIntegralAddressSpace(NewAS) &&
1992 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
1993 }
1994
1995
1996
1998 return .isNonIntegralPointerType(NewTy);
1999
2000
2001
2002 if (.isNonIntegralPointerType(OldTy))
2004
2005 return false;
2006 }
2007
2009 return false;
2010
2011 return true;
2012}
2013
2014
2015
2016
2017
2018
2019
2021 Type *NewTy) {
2022 Type *OldTy = V->getType();
2023
2024#ifndef NDEBUG
2025 BasicBlock *BB = IRB.GetInsertBlock();
2029 "Value not convertable to type");
2030#endif
2031
2032 if (OldTy == NewTy)
2033 return V;
2034
2036 "Integer types must be the exact same to convert.");
2037
2038
2039
2040 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {
2041 Type *InTy = In->getType();
2042 if (InTy == Ty)
2043 return In;
2044
2046
2047
2049 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2051 IRB.getInt64(0)),
2052 Ty);
2053 }
2054
2056
2057
2059 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2060 IRB.getInt64(0));
2061 }
2062
2063 return IRB.CreateBitCast(In, Ty);
2064 };
2065
2066
2068
2069
2070
2071
2072 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),
2073 NewTy);
2074 }
2075
2076
2078
2079
2080
2081
2082 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2083 NewTy);
2084 }
2085
2089
2090
2091
2092
2093
2094
2095 if (OldAS != NewAS) {
2096 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2097 return IRB.CreateIntToPtr(
2098 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2099 DL.getIntPtrType(NewTy)),
2100 NewTy);
2101 }
2102 }
2103
2104 return CreateBitCastLike(V, NewTy);
2105}
2106
2107
2108
2109
2110
2115 unsigned VScale) {
2116
2118 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2119 uint64_t BeginIndex = BeginOffset / ElementSize;
2120 if (BeginIndex * ElementSize != BeginOffset ||
2122 return false;
2123 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2124 uint64_t EndIndex = EndOffset / ElementSize;
2125 if (EndIndex * ElementSize != EndOffset ||
2127 return false;
2128
2129 assert(EndIndex > BeginIndex && "Empty vector!");
2130 uint64_t NumElements = EndIndex - BeginIndex;
2131 Type *SliceTy = (NumElements == 1)
2132 ? Ty->getElementType()
2134
2135 Type *SplitIntTy =
2136 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2137
2138 Use *U = S.getUse();
2139
2141 if (MI->isVolatile())
2142 return false;
2143 if (!S.isSplittable())
2144 return false;
2146 if (->isLifetimeStartOrEnd() &&
->isDroppable())
2147 return false;
2150 return false;
2152
2153 if (LTy->isStructTy())
2154 return false;
2155 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2156 assert(LTy->isIntegerTy());
2157 LTy = SplitIntTy;
2158 }
2160 return false;
2162 if (SI->isVolatile())
2163 return false;
2164 Type *STy = SI->getValueOperand()->getType();
2165
2167 return false;
2168 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2170 STy = SplitIntTy;
2171 }
2173 return false;
2174 } else {
2175 return false;
2176 }
2177
2178 return true;
2179}
2180
2181
2182
2183
2184
2188 bool HaveCommonEltTy, Type *CommonEltTy,
2189 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2190 VectorType *CommonVecPtrTy, unsigned VScale) {
2191
2192 if (CandidateTys.empty())
2193 return nullptr;
2194
2195
2196
2197
2198
2199 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2200 return nullptr;
2201
2202
2203 if (!HaveCommonEltTy && HaveVecPtrTy) {
2204
2205 CandidateTys.clear();
2206 CandidateTys.push_back(CommonVecPtrTy);
2207 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2208
2209 for (VectorType *&VTy : CandidateTys) {
2210 if (!VTy->getElementType()->isIntegerTy())
2212 VTy->getContext(), VTy->getScalarSizeInBits())));
2213 }
2214
2215
2216
2218 (void)DL;
2219 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2220 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2221 "Cannot have vector types of different sizes!");
2222 assert(RHSTy->getElementType()->isIntegerTy() &&
2223 "All non-integer types eliminated!");
2224 assert(LHSTy->getElementType()->isIntegerTy() &&
2225 "All non-integer types eliminated!");
2228 };
2230 (void)DL;
2231 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2232 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2233 "Cannot have vector types of different sizes!");
2234 assert(RHSTy->getElementType()->isIntegerTy() &&
2235 "All non-integer types eliminated!");
2236 assert(LHSTy->getElementType()->isIntegerTy() &&
2237 "All non-integer types eliminated!");
2240 };
2241 llvm::sort(CandidateTys, RankVectorTypesComp);
2242 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2243 CandidateTys.end());
2244 } else {
2245
2246
2247#ifndef NDEBUG
2248 for (VectorType *VTy : CandidateTys) {
2249 assert(VTy->getElementType() == CommonEltTy &&
2250 "Unaccounted for element type!");
2251 assert(VTy == CandidateTys[0] &&
2252 "Different vector types with the same element type!");
2253 }
2254#endif
2255 CandidateTys.resize(1);
2256 }
2257
2258
2259
2262 std::numeric_limits::max();
2263 });
2264
2265
2268 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2269
2270
2271
2272 if (ElementSize % 8)
2273 return false;
2274 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2275 "vector size not a multiple of element size?");
2276 ElementSize /= 8;
2277
2278 for (const Slice &S : P)
2280 return false;
2281
2282 for (const Slice *S : P.splitSliceTails())
2284 return false;
2285
2286 return true;
2287 });
2288 return VTy != CandidateTys.end() ? *VTy : nullptr;
2289}
2290
2295 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2296 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2297 [[maybe_unused]] VectorType *OriginalElt =
2298 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2299
2300
2301 for (Type *Ty : OtherTys) {
2303 continue;
2304 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2305
2306
2307 for (VectorType *const VTy : CandidateTysCopy) {
2308
2309 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2310 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2311 unsigned ElementSize =
2312 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2314 VectorSize % TypeSize == 0) {
2316 CheckCandidateType(NewVTy);
2317 }
2318 }
2319 }
2320
2322 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2323 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2324}
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2336 unsigned VScale) {
2337
2338
2342 Type *CommonEltTy = nullptr;
2343 VectorType *CommonVecPtrTy = nullptr;
2344 bool HaveVecPtrTy = false;
2345 bool HaveCommonEltTy = true;
2346 bool HaveCommonVecPtrTy = true;
2347 auto CheckCandidateType = [&](Type *Ty) {
2349
2350 if (!CandidateTys.empty()) {
2352 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2353 DL.getTypeSizeInBits(V).getFixedValue()) {
2354 CandidateTys.clear();
2355 return;
2356 }
2357 }
2359 Type *EltTy = VTy->getElementType();
2360
2361 if (!CommonEltTy)
2362 CommonEltTy = EltTy;
2363 else if (CommonEltTy != EltTy)
2364 HaveCommonEltTy = false;
2365
2367 HaveVecPtrTy = true;
2368 if (!CommonVecPtrTy)
2369 CommonVecPtrTy = VTy;
2370 else if (CommonVecPtrTy != VTy)
2371 HaveCommonVecPtrTy = false;
2372 }
2373 }
2374 };
2375
2376
2377 for (const Slice &S : P) {
2382 Ty = SI->getValueOperand()->getType();
2383 else
2384 continue;
2385
2386 auto CandTy = Ty->getScalarType();
2387 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2388 S.endOffset() != P.endOffset())) {
2389 DeferredTys.insert(Ty);
2390 continue;
2391 }
2392
2393 LoadStoreTys.insert(Ty);
2394
2395 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2396 CheckCandidateType(Ty);
2397 }
2398
2401 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2402 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2403 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2404 return VTy;
2405
2406 CandidateTys.clear();
2408 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2409 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2410 CommonVecPtrTy, VScale);
2411}
2412
2413
2414
2415
2416
2419 Type *AllocaTy,
2421 bool &WholeAllocaOp) {
2422 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2423
2424 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2425 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2426
2427 Use *U = S.getUse();
2428
2429
2430
2431
2432
2434 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2435 return true;
2436 }
2437
2438
2439
2440 if (RelEnd > Size)
2441 return false;
2442
2445 return false;
2446
2449 return false;
2450
2451
2452 if (S.beginOffset() < AllocBeginOffset)
2453 return false;
2454
2455
2456
2458 WholeAllocaOp = true;
2460 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2461 return false;
2462 } else if (RelBegin != 0 || RelEnd != Size ||
2464
2465
2466 return false;
2467 }
2469 Type *ValueTy = SI->getValueOperand()->getType();
2470 if (SI->isVolatile())
2471 return false;
2472
2473 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
2475 return false;
2476
2477
2478 if (S.beginOffset() < AllocBeginOffset)
2479 return false;
2480
2481
2482
2484 WholeAllocaOp = true;
2486 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2487 return false;
2488 } else if (RelBegin != 0 || RelEnd != Size ||
2490
2491
2492 return false;
2493 }
2496 return false;
2497 if (!S.isSplittable())
2498 return false;
2499 } else {
2500 return false;
2501 }
2502
2503 return true;
2504}
2505
2506
2507
2508
2509
2510
2511
2514 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2515
2517 return false;
2518
2519
2520 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2521 return false;
2522
2523
2524
2525
2529 return false;
2530
2531
2532
2533
2534
2535
2536
2537
2538 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2539
2540 for (const Slice &S : P)
2542 WholeAllocaOp))
2543 return false;
2544
2545 for (const Slice *S : P.splitSliceTails())
2547 WholeAllocaOp))
2548 return false;
2549
2550 return WholeAllocaOp;
2551}
2552
2555 const Twine &Name) {
2558 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2559 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2560 "Element extends past full value");
2562 if (DL.isBigEndian())
2563 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2564 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2565 if (ShAmt) {
2566 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2568 }
2569 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2570 "Cannot extract to a larger integer!");
2571 if (Ty != IntTy) {
2572 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2574 }
2575 return V;
2576}
2577
2582 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2583 "Cannot insert a larger integer!");
2585 if (Ty != IntTy) {
2586 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2588 }
2589 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2590 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2591 "Element store outside of alloca store");
2593 if (DL.isBigEndian())
2594 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2595 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2596 if (ShAmt) {
2597 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2599 }
2600
2601 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2602 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2603 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2605 V = IRB.CreateOr(Old, V, Name + ".insert");
2607 }
2608 return V;
2609}
2610
2612 unsigned EndIndex, const Twine &Name) {
2614 unsigned NumElements = EndIndex - BeginIndex;
2616
2617 if (NumElements == VecTy->getNumElements())
2618 return V;
2619
2620 if (NumElements == 1) {
2621 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2622 Name + ".extract");
2624 return V;
2625 }
2626
2628 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2630 return V;
2631}
2632
2634 unsigned BeginIndex, const Twine &Name) {
2636 assert(VecTy && "Can only insert a vector into a vector");
2637
2639 if (!Ty) {
2640
2641 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2642 Name + ".insert");
2644 return V;
2645 }
2646
2649 "Too many elements!");
2652 assert(V->getType() == VecTy && "Vector type mismatch");
2653 return V;
2654 }
2656
2657
2658
2659
2660
2664 if (i >= BeginIndex && i < EndIndex)
2665 Mask.push_back(i - BeginIndex);
2666 else
2667 Mask.push_back(-1);
2668 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2670
2674 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2675
2676
2677 V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
2679
2681 return V;
2682}
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2709
2710
2711
2714
2715
2716
2718 const char *DebugName) {
2719 Type *EltType = VecType->getElementType();
2720 if (EltType != NewAIEltTy) {
2721
2722 unsigned TotalBits =
2723 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);
2724 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);
2725
2727 V = Builder.CreateBitCast(V, NewVecType);
2728 VecType = NewVecType;
2729 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");
2730 }
2731 };
2732
2733 BitcastIfNeeded(V0, VecType0, "V0");
2734 BitcastIfNeeded(V1, VecType1, "V1");
2735
2736 unsigned NumElts0 = VecType0->getNumElements();
2737 unsigned NumElts1 = VecType1->getNumElements();
2738
2740
2741 if (NumElts0 == NumElts1) {
2742 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2743 ShuffleMask.push_back(i);
2744 } else {
2745
2746
2747 unsigned SmallSize = std::min(NumElts0, NumElts1);
2748 unsigned LargeSize = std::max(NumElts0, NumElts1);
2749 bool IsV0Smaller = NumElts0 < NumElts1;
2750 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2752 for (unsigned i = 0; i < SmallSize; ++i)
2754 for (unsigned i = SmallSize; i < LargeSize; ++i)
2756 ExtendedVec = Builder.CreateShuffleVector(
2758 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");
2759 for (unsigned i = 0; i < NumElts0; ++i)
2760 ShuffleMask.push_back(i);
2761 for (unsigned i = 0; i < NumElts1; ++i)
2762 ShuffleMask.push_back(LargeSize + i);
2763 }
2764
2765 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2766}
2767
2768namespace {
2769
2770
2771
2772
2773
2774
2775
2776class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2777
2778 friend class InstVisitor<AllocaSliceRewriter, bool>;
2779
2780 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2781
2782 const DataLayout &DL;
2783 AllocaSlices &AS;
2785 AllocaInst &OldAI, &NewAI;
2786 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2787 Type *NewAllocaTy;
2788
2789
2790
2791
2792
2793 IntegerType *IntTy;
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2805 Type *ElementTy;
2806 uint64_t ElementSize;
2807
2808
2809
2810 uint64_t BeginOffset = 0;
2811 uint64_t EndOffset = 0;
2812
2813
2814
2815 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2816
2817 uint64_t SliceSize = 0;
2818 bool IsSplittable = false;
2819 bool IsSplit = false;
2820 Use *OldUse = nullptr;
2822
2823
2824 SmallSetVector<PHINode *, 8> &PHIUsers;
2825 SmallSetVector<SelectInst *, 8> &SelectUsers;
2826
2827
2828
2829 IRBuilderTy IRB;
2830
2831
2832
2833 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2835 return &NewAI;
2836
2837 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2838 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2839 }
2840
2841public:
2842 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2843 AllocaInst &OldAI, AllocaInst &NewAI,
2844 uint64_t NewAllocaBeginOffset,
2845 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2846 VectorType *PromotableVecTy,
2847 SmallSetVector<PHINode *, 8> &PHIUsers,
2848 SmallSetVector<SelectInst *, 8> &SelectUsers)
2849 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2850 NewAllocaBeginOffset(NewAllocaBeginOffset),
2851 NewAllocaEndOffset(NewAllocaEndOffset),
2852 NewAllocaTy(NewAI.getAllocatedType()),
2853 IntTy(
2854 IsIntegerPromotable
2856 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2857 .getFixedValue())
2858 : nullptr),
2859 VecTy(PromotableVecTy),
2860 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2861 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2862 : 0),
2863 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2864 IRB(NewAI.getContext(), ConstantFolder()) {
2865 if (VecTy) {
2866 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2867 "Only multiple-of-8 sized vector elements are viable");
2868 ++NumVectorized;
2869 }
2870 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2871 }
2872
2873 bool visit(AllocaSlices::const_iterator I) {
2874 bool CanSROA = true;
2875 BeginOffset = I->beginOffset();
2876 EndOffset = I->endOffset();
2877 IsSplittable = I->isSplittable();
2878 IsSplit =
2879 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2880 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2883
2884
2885 assert(BeginOffset < NewAllocaEndOffset);
2886 assert(EndOffset > NewAllocaBeginOffset);
2887 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2888 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2889
2890 SliceSize = NewEndOffset - NewBeginOffset;
2891 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2892 << ") NewBegin:(" << NewBeginOffset << ", "
2893 << NewEndOffset << ") NewAllocaBegin:("
2894 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2895 << ")\n");
2896 assert(IsSplit || NewBeginOffset == BeginOffset);
2897 OldUse = I->getUse();
2899
2901 IRB.SetInsertPoint(OldUserI);
2902 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2903 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2904 Twine(BeginOffset) + ".");
2905
2907 if (VecTy || IntTy)
2909 return CanSROA;
2910 }
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950 std::optional<SmallVector<Value *, 4>>
2951 rewriteTreeStructuredMerge(Partition &P) {
2952
2953 if (P.splitSliceTails().size() > 0)
2954 return std::nullopt;
2955
2957 LoadInst *TheLoad = nullptr;
2958
2959
2960 struct StoreInfo {
2961 StoreInst *Store;
2962 uint64_t BeginOffset;
2963 uint64_t EndOffset;
2964 Value *StoredValue;
2965 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)
2966 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2967 };
2968
2970
2971
2972
2973 Type *AllocatedEltTy =
2976 : Type::getInt8Ty(NewAI.getContext());
2977 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
2978
2979
2980
2981
2982
2983
2984 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {
2986 return FixedVecTy &&
2987 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2988 !FixedVecTy->getElementType()->isPointerTy();
2989 };
2990
2991 for (Slice &S : P) {
2994
2995
2996
2997
2998
2999 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||
3000 S.beginOffset() != NewAllocaBeginOffset ||
3001 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())
3002 return std::nullopt;
3003 TheLoad = LI;
3005
3006
3007
3008
3009
3010 if (!IsTypeValidForTreeStructuredMerge(
3011 SI->getValueOperand()->getType()) ||
3012 SI->isVolatile())
3013 return std::nullopt;
3015 unsigned NumElts = VecTy->getNumElements();
3016 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
3017 if (NumElts * EltSize % AllocatedEltTySize != 0)
3018 return std::nullopt;
3019 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
3020 SI->getValueOperand());
3021 } else {
3022
3023
3024 return std::nullopt;
3025 }
3026 }
3027
3028 if (!TheLoad)
3029 return std::nullopt;
3030
3031
3032 if (StoreInfos.size() < 2)
3033 return std::nullopt;
3034
3035
3036
3037 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {
3038 return A.BeginOffset < B.BeginOffset;
3039 });
3040
3041
3042 uint64_t ExpectedStart = NewAllocaBeginOffset;
3043 for (auto &StoreInfo : StoreInfos) {
3044 uint64_t BeginOff = StoreInfo.BeginOffset;
3045 uint64_t EndOff = StoreInfo.EndOffset;
3046
3047
3048 if (BeginOff != ExpectedStart)
3049 return std::nullopt;
3050
3051 ExpectedStart = EndOff;
3052 }
3053
3054 if (ExpectedStart != NewAllocaEndOffset)
3055 return std::nullopt;
3056
3057
3058
3059
3060
3061
3062
3063
3064
3066 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3067
3068 for (auto &StoreInfo : StoreInfos) {
3069 if (StoreInfo.Store->getParent() != StoreBB)
3070 return std::nullopt;
3071 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3072 return std::nullopt;
3073 }
3074
3075
3076
3078 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad
3079 << "\n Ordered stores:\n";
3081 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "
3082 << Info.EndOffset << ") \tStore: " << *Info.Store
3083 << "\tValue: " << *Info.StoredValue << "\n";
3084 });
3085
3086
3087
3088 std::queue<Value *> VecElements;
3089 IRBuilder<> Builder(StoreInfos.back().Store);
3090 for (const auto &Info : StoreInfos) {
3092 VecElements.push(Info.StoredValue);
3093 }
3094
3095 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");
3096 while (VecElements.size() > 1) {
3097 const auto NumElts = VecElements.size();
3098 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {
3099 Value *V0 = VecElements.front();
3100 VecElements.pop();
3101 Value *V1 = VecElements.front();
3102 VecElements.pop();
3104 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");
3105 VecElements.push(Merged);
3106 }
3107 if (NumElts % 2 == 1) {
3108 Value *V = VecElements.front();
3109 VecElements.pop();
3110 VecElements.push(V);
3111 }
3112 }
3113
3114
3115 Value *MergedValue = VecElements.front();
3116 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3117
3120 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),
3121 TheLoad->getName() + ".sroa.new.load"));
3122 DeletedValues.push_back(TheLoad);
3123
3124 return DeletedValues;
3125 }
3126
3127private:
3128
3129 using Base::visit;
3130
3131
3132 bool visitInstruction(Instruction &I) {
3133 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
3135 }
3136
3138
3139
3140 assert(IsSplit || BeginOffset == NewBeginOffset);
3141 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3142
3143 StringRef OldName = OldPtr->getName();
3144
3145 size_t LastSROAPrefix = OldName.rfind(".sroa.");
3147 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
3148
3150 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
3151
3152 OldName = OldName.substr(IndexEnd + 1);
3154 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
3155
3156 OldName = OldName.substr(OffsetEnd + 1);
3157 }
3158 }
3159
3160 OldName = OldName.substr(0, OldName.find(".sroa_"));
3161
3164 PointerTy, Twine(OldName) + ".");
3165 }
3166
3167
3168
3169
3170
3171
3172 Align getSliceAlign() {
3174 NewBeginOffset - NewAllocaBeginOffset);
3175 }
3176
3177 unsigned getIndex(uint64_t Offset) {
3178 assert(VecTy && "Can only call getIndex when rewriting a vector");
3179 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
3180 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
3181 uint32_t Index = RelOffset / ElementSize;
3182 assert(Index * ElementSize == RelOffset);
3184 }
3185
3186 void deleteIfTriviallyDead(Value *V) {
3189 Pass.DeadInsts.push_back(I);
3190 }
3191
3192 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3193 unsigned BeginIndex = getIndex(NewBeginOffset);
3194 unsigned EndIndex = getIndex(NewEndOffset);
3195 assert(EndIndex > BeginIndex && "Empty vector!");
3196
3199
3200 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3201 LLVMContext::MD_access_group});
3202 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
3203 }
3204
3205 Value *rewriteIntegerLoad(LoadInst &LI) {
3206 assert(IntTy && "We cannot insert an integer to the alloca");
3211 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3212 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3213 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3214 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
3216 }
3217
3218
3219
3220
3221
3223 "Can only handle an extract for an overly wide load");
3225 V = IRB.CreateZExt(V, LI.getType());
3226 return V;
3227 }
3228
3229 bool visitLoadInst(LoadInst &LI) {
3232 assert(OldOp == OldPtr);
3233
3235
3237
3238 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
3240 bool IsPtrAdjusted = false;
3242 if (VecTy) {
3243 V = rewriteVectorizedLoadInst(LI);
3245 V = rewriteIntegerLoad(LI);
3246 } else if (NewBeginOffset == NewAllocaBeginOffset &&
3247 NewEndOffset == NewAllocaEndOffset &&
3250 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3252 Value *NewPtr =
3253 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3254 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3255 NewAI.getAlign(), LI.isVolatile(),
3256 LI.getName());
3257 if (LI.isVolatile())
3258 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3259 if (NewLI->isAtomic())
3260 NewLI->setAlignment(LI.getAlign());
3261
3262
3263
3264
3265 copyMetadataForLoad(*NewLI, LI);
3266
3267
3268 if (AATags)
3269 NewLI->setAAMetadata(AATags.adjustForAccess(
3270 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3271
3272
3273 V = NewLI;
3274
3275
3276
3277
3278 if (auto *AITy = dyn_cast(NewAllocaTy))
3279 if (auto *TITy = dyn_cast(TargetTy))
3280 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3281 V = IRB.CreateZExt(V, TITy, "load.ext");
3282 if (DL.isBigEndian())
3283 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3284 "endian_shift");
3285 }
3286 } else {
3287 Type *LTy = IRB.getPtrTy(AS);
3288 LoadInst *NewLI =
3289 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3291
3292 if (AATags)
3294 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3295
3298 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3299 LLVMContext::MD_access_group});
3300
3301 V = NewLI;
3302 IsPtrAdjusted = true;
3303 }
3305
3306 if (IsSplit) {
3309 "Only integer type loads and stores are split");
3310 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
3311 "Split load isn't smaller than original load");
3313 "Non-byte-multiple bit width");
3314
3316
3317
3318
3319 LIIt.setHeadBit(true);
3320 IRB.SetInsertPoint(LI.getParent(), LIIt);
3321
3322
3323
3324
3325 Value *Placeholder =
3327 false, Align(1));
3328 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
3329 "insert");
3331 Placeholder->replaceAllUsesWith(&LI);
3332 Placeholder->deleteValue();
3333 } else {
3335 }
3336
3337 Pass.DeadInsts.push_back(&LI);
3338 deleteIfTriviallyDead(OldOp);
3340 return !LI.isVolatile() && !IsPtrAdjusted;
3341 }
3342
3343 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3344 AAMDNodes AATags) {
3345
3346
3348 if (V->getType() != VecTy) {
3349 unsigned BeginIndex = getIndex(NewBeginOffset);
3350 unsigned EndIndex = getIndex(NewEndOffset);
3351 assert(EndIndex > BeginIndex && "Empty vector!");
3352 unsigned NumElements = EndIndex - BeginIndex;
3354 "Too many elements!");
3355 Type *SliceTy = (NumElements == 1)
3356 ? ElementTy
3357 : FixedVectorType::get(ElementTy, NumElements);
3358 if (V->getType() != SliceTy)
3360
3361
3364 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3365 }
3366 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3367 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3368 LLVMContext::MD_access_group});
3369 if (AATags)
3372 Pass.DeadInsts.push_back(&SI);
3373
3374
3375 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3376 Store, Store->getPointerOperand(), OrigV, DL);
3378 return true;
3379 }
3380
3381 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3382 assert(IntTy && "We cannot extract an integer from the alloca");
3384 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3387 NewAI.getAlign(), "oldload");
3389 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3390 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3392 }
3394 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3395 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3396 LLVMContext::MD_access_group});
3397 if (AATags)
3400
3401 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3402 Store, Store->getPointerOperand(),
3403 Store->getValueOperand(), DL);
3404
3405 Pass.DeadInsts.push_back(&SI);
3407 return true;
3408 }
3409
3410 bool visitStoreInst(StoreInst &SI) {
3412 Value *OldOp = SI.getOperand(1);
3413 assert(OldOp == OldPtr);
3414
3415 AAMDNodes AATags = SI.getAAMetadata();
3416 Value *V = SI.getValueOperand();
3417
3418
3419
3420 if (V->getType()->isPointerTy())
3422 Pass.PostPromotionWorklist.insert(AI);
3423
3424 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
3427 assert(V->getType()->isIntegerTy() &&
3428 "Only integer type loads and stores are split");
3429 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3430 "Non-byte-multiple bit width");
3431 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3432 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3433 "extract");
3434 }
3435
3436 if (VecTy)
3437 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3438 if (IntTy && V->getType()->isIntegerTy())
3439 return rewriteIntegerStore(V, SI, AATags);
3440
3441 StoreInst *NewSI;
3442 if (NewBeginOffset == NewAllocaBeginOffset &&
3443 NewEndOffset == NewAllocaEndOffset &&
3447 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3448
3449 NewSI =
3450 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3451 } else {
3452 unsigned AS = SI.getPointerAddressSpace();
3453 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3454 NewSI =
3455 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3456 }
3457 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3458 LLVMContext::MD_access_group});
3459 if (AATags)
3462 if (SI.isVolatile())
3463 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3466
3467 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3470
3471 Pass.DeadInsts.push_back(&SI);
3472 deleteIfTriviallyDead(OldOp);
3473
3477 .isVolatile();
3478 }
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3490 assert(Size > 0 && "Expected a positive number of bytes.");
3492 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3493 if (Size == 1)
3494 return V;
3495
3497 V = IRB.CreateMul(
3498 IRB.CreateZExt(V, SplatIntTy, "zext"),
3501 SplatIntTy)),
3502 "isplat");
3503 return V;
3504 }
3505
3506
3508 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3510 return V;
3511 }
3512
3513 bool visitMemSetInst(MemSetInst &II) {
3515 assert(II.getRawDest() == OldPtr);
3516
3517 AAMDNodes AATags = II.getAAMetadata();
3518
3519
3520
3523 assert(NewBeginOffset == BeginOffset);
3524 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3525 II.setDestAlignment(getSliceAlign());
3526
3527
3528
3530 "AT: Unexpected link to non-const GEP");
3531 deleteIfTriviallyDead(OldPtr);
3532 return false;
3533 }
3534
3535
3536 Pass.DeadInsts.push_back(&II);
3537
3540
3541 const bool CanContinue = [&]() {
3542 if (VecTy || IntTy)
3543 return true;
3544 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3545 return false;
3546
3548 const uint64_t Len = C->getLimitedValue();
3549 if (Len > std::numeric_limits::max())
3550 return false;
3551 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3554 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3555 }();
3556
3557
3558
3559 if (!CanContinue) {
3560 Type *SizeTy = II.getLength()->getType();
3561 unsigned Sz = NewEndOffset - NewBeginOffset;
3562 Constant *Size = ConstantInt::get(SizeTy, Sz);
3564 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3565 MaybeAlign(getSliceAlign()), II.isVolatile()));
3566 if (AATags)
3567 New->setAAMetadata(
3568 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3569
3570 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3571 New, New->getRawDest(), nullptr, DL);
3572
3574 return false;
3575 }
3576
3577
3578
3579
3580
3581
3583
3584 if (VecTy) {
3585
3586 assert(ElementTy == ScalarTy);
3587
3588 unsigned BeginIndex = getIndex(NewBeginOffset);
3589 unsigned EndIndex = getIndex(NewEndOffset);
3590 assert(EndIndex > BeginIndex && "Empty vector!");
3591 unsigned NumElements = EndIndex - BeginIndex;
3593 "Too many elements!");
3594
3596 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3598 if (NumElements > 1)
3600
3602 NewAI.getAlign(), "oldload");
3604 } else if (IntTy) {
3605
3606
3608
3609 uint64_t Size = NewEndOffset - NewBeginOffset;
3610 V = getIntegerSplat(II.getValue(), Size);
3611
3612 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3613 EndOffset != NewAllocaBeginOffset)) {
3615 NewAI.getAlign(), "oldload");
3617 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3619 } else {
3620 assert(V->getType() == IntTy &&
3621 "Wrong type for an alloca wide integer!");
3622 }
3624 } else {
3625
3626 assert(NewBeginOffset == NewAllocaBeginOffset);
3627 assert(NewEndOffset == NewAllocaEndOffset);
3628
3629 V = getIntegerSplat(II.getValue(),
3630 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3634
3636 }
3637
3638 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3639 StoreInst *New =
3640 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3641 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3642 LLVMContext::MD_access_group});
3643 if (AATags)
3644 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3646
3647 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3648 New, New->getPointerOperand(), V, DL);
3649
3651 return .isVolatile();
3652 }
3653
3654 bool visitMemTransferInst(MemTransferInst &II) {
3655
3656
3657
3659
3660 AAMDNodes AATags = II.getAAMetadata();
3661
3662 bool IsDest = &II.getRawDestUse() == OldUse;
3663 assert((IsDest && II.getRawDest() == OldPtr) ||
3664 (!IsDest && II.getRawSource() == OldPtr));
3665
3666 Align SliceAlign = getSliceAlign();
3667
3668
3669
3670
3671
3672
3673
3674 if (!IsSplittable) {
3675 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3676 if (IsDest) {
3677
3680 DbgAssign->getAddress() == II.getDest())
3681 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3682 }
3683 II.setDest(AdjustedPtr);
3684 II.setDestAlignment(SliceAlign);
3685 } else {
3686 II.setSource(AdjustedPtr);
3687 II.setSourceAlignment(SliceAlign);
3688 }
3689
3691 deleteIfTriviallyDead(OldPtr);
3692 return false;
3693 }
3694
3695
3696
3697
3698
3699
3700
3701
3702 bool EmitMemCpy =
3703 !VecTy && !IntTy &&
3704 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3705 SliceSize !=
3709
3710
3711
3712
3713 if (EmitMemCpy && &OldAI == &NewAI) {
3714
3715 assert(NewBeginOffset == BeginOffset);
3716
3717
3718 if (NewEndOffset != EndOffset)
3719 II.setLength(NewEndOffset - NewBeginOffset);
3720 return false;
3721 }
3722
3723 Pass.DeadInsts.push_back(&II);
3724
3725
3726
3727 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3728 if (AllocaInst *AI =
3730 assert(AI != &OldAI && AI != &NewAI &&
3731 "Splittable transfers cannot reach the same alloca on both ends.");
3732 Pass.Worklist.insert(AI);
3733 }
3734
3737
3738
3739 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3740 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3741 Align OtherAlign =
3742 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3743 OtherAlign =
3744 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3745
3746 if (EmitMemCpy) {
3747
3748
3749 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3750 OtherPtr->getName() + ".");
3751
3752 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3753 Type *SizeTy = II.getLength()->getType();
3754 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3755
3756 Value *DestPtr, *SrcPtr;
3757 MaybeAlign DestAlign, SrcAlign;
3758
3759 if (IsDest) {
3760 DestPtr = OurPtr;
3761 DestAlign = SliceAlign;
3762 SrcPtr = OtherPtr;
3763 SrcAlign = OtherAlign;
3764 } else {
3765 DestPtr = OtherPtr;
3766 DestAlign = OtherAlign;
3767 SrcPtr = OurPtr;
3768 SrcAlign = SliceAlign;
3769 }
3770 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3772 if (AATags)
3773 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3774
3775 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3776 if (IsDest) {
3777 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3778 &II, New, DestPtr, nullptr, DL);
3783 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3784 }
3786 return false;
3787 }
3788
3789 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3790 NewEndOffset == NewAllocaEndOffset;
3791 uint64_t Size = NewEndOffset - NewBeginOffset;
3792 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3793 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3794 unsigned NumElements = EndIndex - BeginIndex;
3795 IntegerType *SubIntTy =
3796 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3797
3798
3799
3800 Type *OtherTy;
3801 if (VecTy && !IsWholeAlloca) {
3802 if (NumElements == 1)
3803 OtherTy = VecTy->getElementType();
3804 else
3806 } else if (IntTy && !IsWholeAlloca) {
3807 OtherTy = SubIntTy;
3808 } else {
3809 OtherTy = NewAllocaTy;
3810 }
3811
3813 OtherPtr->getName() + ".");
3814 MaybeAlign SrcAlign = OtherAlign;
3815 MaybeAlign DstAlign = SliceAlign;
3816 if (!IsDest)
3818
3821
3822 if (IsDest) {
3823 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3824 SrcPtr = AdjPtr;
3825 } else {
3826 DstPtr = AdjPtr;
3827 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3828 }
3829
3831 if (VecTy && !IsWholeAlloca && !IsDest) {
3832 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3834 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3835 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3836 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
3839 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3841 } else {
3842 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3843 II.isVolatile(), "copyload");
3844 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3845 LLVMContext::MD_access_group});
3846 if (AATags)
3847 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3850 }
3851
3852 if (VecTy && !IsWholeAlloca && IsDest) {
3854 NewAI.getAlign(), "oldload");
3855 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3856 } else if (IntTy && !IsWholeAlloca && IsDest) {
3858 NewAI.getAlign(), "oldload");
3860 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3863 }
3864
3866 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3867 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3868 LLVMContext::MD_access_group});
3869 if (AATags)
3871 Src->getType(), DL));
3872
3873 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3874 if (IsDest) {
3875
3876 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3877 Store, DstPtr, Src, DL);
3882 &II, Store, DstPtr, Src, DL);
3883 }
3884
3886 return .isVolatile();
3887 }
3888
3889 bool visitIntrinsicInst(IntrinsicInst &II) {
3890 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
3891 "Unexpected intrinsic!");
3893
3894
3895 Pass.DeadInsts.push_back(&II);
3896
3897 if (II.isDroppable()) {
3898 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3899
3901 return true;
3902 }
3903
3904 assert(II.getArgOperand(0) == OldPtr);
3908 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3909 New = IRB.CreateLifetimeStart(Ptr);
3910 else
3911 New = IRB.CreateLifetimeEnd(Ptr);
3912
3913 (void)New;
3915
3916 return true;
3917 }
3918
3919 void fixLoadStoreAlign(Instruction &Root) {
3920
3921
3922
3923 SmallPtrSet<Instruction *, 4> Visited;
3924 SmallVector<Instruction *, 4> Uses;
3925 Visited.insert(&Root);
3926 Uses.push_back(&Root);
3927 do {
3929
3932 continue;
3933 }
3935 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3936 continue;
3937 }
3938
3942 for (User *U : I->users())
3945 } while (.empty());
3946 }
3947
3948 bool visitPHINode(PHINode &PN) {
3950 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3951 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3952
3953
3954
3955
3956
3957 IRBuilderBase::InsertPointGuard Guard(IRB);
3959 IRB.SetInsertPoint(OldPtr->getParent(),
3960 OldPtr->getParent()->getFirstInsertionPt());
3961 else
3962 IRB.SetInsertPoint(OldPtr);
3963 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3964
3965 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3966
3968
3970 deleteIfTriviallyDead(OldPtr);
3971
3972
3973 fixLoadStoreAlign(PN);
3974
3975
3976
3977
3978 PHIUsers.insert(&PN);
3979 return true;
3980 }
3981
3982 bool visitSelectInst(SelectInst &SI) {
3984 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
3985 "Pointer isn't an operand!");
3986 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
3987 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
3988
3989 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3990
3991 if (SI.getOperand(1) == OldPtr)
3992 SI.setOperand(1, NewPtr);
3993 if (SI.getOperand(2) == OldPtr)
3994 SI.setOperand(2, NewPtr);
3995
3997 deleteIfTriviallyDead(OldPtr);
3998
3999
4000 fixLoadStoreAlign(SI);
4001
4002
4003
4004
4005 SelectUsers.insert(&SI);
4006 return true;
4007 }
4008};
4009
4010
4011
4012
4013
4014
4015class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
4016
4017 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4018
4019
4021
4022
4023 SmallPtrSet<User *, 8> Visited;
4024
4025
4026
4028
4029
4030 const DataLayout &DL;
4031
4032 IRBuilderTy &IRB;
4033
4034public:
4035 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
4037
4038
4039
4040 bool rewrite(Instruction &I) {
4041 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
4042 enqueueUsers(I);
4044 while (.empty()) {
4045 U = Queue.pop_back_val();
4047 }
4049 }
4050
4051private:
4052
4053
4054 void enqueueUsers(Instruction &I) {
4055 for (Use &U : I.uses())
4056 if (Visited.insert(U.getUser()).second)
4057 Queue.push_back(&U);
4058 }
4059
4060
4061 bool visitInstruction(Instruction &I) { return false; }
4062
4063
4064 template class OpSplitter {
4065 protected:
4066
4067 IRBuilderTy &IRB;
4068
4069
4070
4071 SmallVector<unsigned, 4> Indices;
4072
4073
4074
4076
4077
4078
4080
4081
4082 Type *BaseTy;
4083
4084
4085 Align BaseAlign;
4086
4087
4088
4089 const DataLayout &DL;
4090
4091
4092
4093 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4094 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
4095 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4096 BaseAlign(BaseAlign), DL(DL) {
4097 IRB.SetInsertPoint(InsertionPoint);
4098 }
4099
4100 public:
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
4116 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4117 return static_cast<Derived *>(this)->emitFunc(
4119 }
4120
4122 unsigned OldSize = Indices.size();
4123 (void)OldSize;
4124 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
4125 ++Idx) {
4126 assert(Indices.size() == OldSize && "Did not return to the old size");
4128 GEPIndices.push_back(IRB.getInt32(Idx));
4129 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
4132 }
4133 return;
4134 }
4135
4137 unsigned OldSize = Indices.size();
4138 (void)OldSize;
4139 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
4140 ++Idx) {
4141 assert(Indices.size() == OldSize && "Did not return to the old size");
4143 GEPIndices.push_back(IRB.getInt32(Idx));
4144 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
4147 }
4148 return;
4149 }
4150
4151 llvm_unreachable("Only arrays and structs are aggregate loadable types");
4152 }
4153 };
4154
4155 struct LoadOpSplitter : public OpSplitter {
4156 AAMDNodes AATags;
4157
4158
4160
4161
4163
4164 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4165 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
4166 IRBuilderTy &IRB)
4167 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
4168 IRB),
4169 AATags(AATags) {}
4170
4171
4172
4173 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4175
4177 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4178 LoadInst *Load =
4179 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
4180
4183 if (AATags &&
4185 Load->setAAMetadata(
4187
4188
4190
4191 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
4193 }
4194
4195
4196 void recordFakeUses(LoadInst &LI) {
4197 for (Use &U : LI.uses())
4199 if (II->getIntrinsicID() == Intrinsic::fake_use)
4201 }
4202
4203
4204
4205 void emitFakeUses() {
4206 for (Instruction *I : FakeUses) {
4207 IRB.SetInsertPoint(I);
4208 for (auto *V : Components)
4209 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});
4210 I->eraseFromParent();
4211 }
4212 }
4213 };
4214
4215 bool visitLoadInst(LoadInst &LI) {
4218 return false;
4219
4220
4224 Splitter.recordFakeUses(LI);
4226 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
4227 Splitter.emitFakeUses();
4228 Visited.erase(&LI);
4231 return true;
4232 }
4233
4234 struct StoreOpSplitter : public OpSplitter {
4235 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4236 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4237 const DataLayout &DL, IRBuilderTy &IRB)
4238 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign,
4239 DL, IRB),
4240 AATags(AATags), AggStore(AggStore) {}
4241 AAMDNodes AATags;
4242 StoreInst *AggStore;
4243
4244
4245 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4247
4248
4249
4250
4251 Value *ExtractValue =
4252 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
4253 Value *InBoundsGEP =
4254 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4255 StoreInst *Store =
4256 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4257
4261 if (AATags) {
4264 }
4265
4266
4267
4268
4269
4272 uint64_t SizeInBits =
4273 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
4275 SizeInBits, AggStore, Store,
4276 Store->getPointerOperand(), Store->getValueOperand(),
4277 DL);
4278 } else {
4280 "AT: unexpected debug.assign linked to store through "
4281 "unbounded GEP");
4282 }
4284 }
4285 };
4286
4287 bool visitStoreInst(StoreInst &SI) {
4288 if (.isSimple() || SI.getPointerOperand() != *U)
4289 return false;
4290 Value *V = SI.getValueOperand();
4291 if (V->getType()->isSingleValueType())
4292 return false;
4293
4294
4296 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
4298 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
4299 Visited.erase(&SI);
4300
4301
4303 SI.eraseFromParent();
4304 return true;
4305 }
4306
4307 bool visitBitCastInst(BitCastInst &BC) {
4308 enqueueUsers(BC);
4309 return false;
4310 }
4311
4312 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4313 enqueueUsers(ASC);
4314 return false;
4315 }
4316
4317
4318
4319
4320
4321
4322 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4323
4324
4328 if (Sel)
4329 return false;
4330
4331 Sel = SI;
4334 return false;
4335 continue;
4336 }
4338 if (Sel)
4339 return false;
4340 Sel = ZI;
4341 if (!ZI->getSrcTy()->isIntegerTy(1))
4342 return false;
4343 continue;
4344 }
4345
4347 return false;
4348 }
4349
4350 if (!Sel)
4351 return false;
4352
4353 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4354 dbgs() << " original: " << *Sel << "\n";
4355 dbgs() << " " << GEPI << "\n";);
4356
4357 auto GetNewOps = [&](Value *SelOp) {
4360 if (Op == Sel)
4362 else
4364 return NewOps;
4365 };
4366
4370 Cond = SI->getCondition();
4371 True = SI->getTrueValue();
4372 False = SI->getFalseValue();
4374 MDFrom = SI;
4375 } else {
4376 Cond = Sel->getOperand(0);
4377 True = ConstantInt::get(Sel->getType(), 1);
4378 False = ConstantInt::get(Sel->getType(), 0);
4379 }
4382
4383 IRB.SetInsertPoint(&GEPI);
4385
4387 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4388 True->getName() + ".sroa.gep", NW);
4389
4391 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4392 False->getName() + ".sroa.gep", NW);
4393
4394 Value *NSel = MDFrom
4395 ? IRB.CreateSelect(Cond, NTrue, NFalse,
4396 Sel->getName() + ".sroa.sel", MDFrom)
4397 : IRB.CreateSelectWithUnknownProfile(
4399 Sel->getName() + ".sroa.sel");
4400 Visited.erase(&GEPI);
4404 Visited.insert(NSelI);
4405 enqueueUsers(*NSelI);
4406
4408 dbgs() << " " << *NFalse << "\n";
4409 dbgs() << " " << *NSel << "\n";);
4410
4411 return true;
4412 }
4413
4414
4415
4416
4417
4418 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4419
4420
4421
4423 auto IsInvalidPointerOperand = [](Value *V) {
4425 return false;
4427 return !AI->isStaticAlloca();
4428 return true;
4429 };
4430 if (Phi) {
4431 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4432 return false;
4433 } else {
4435 return false;
4436 }
4437
4438
4441 if (Phi)
4442 return false;
4443
4445 if ((Phi->incoming_values(),
4446 [](Value *V) { return isa(V); }))
4447 return false;
4448 continue;
4449 }
4450
4452 return false;
4453 }
4454
4455 if (!Phi)
4456 return false;
4457
4458 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4459 dbgs() << " original: " << *Phi << "\n";
4460 dbgs() << " " << GEPI << "\n";);
4461
4462 auto GetNewOps = [&](Value *PhiOp) {
4465 if (Op == Phi)
4467 else
4469 return NewOps;
4470 };
4471
4472 IRB.SetInsertPoint(Phi);
4473 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4474 Phi->getName() + ".sroa.phi");
4475
4477
4478
4480 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4486 } else {
4488 NewGEP =
4489 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4491 }
4493 }
4494
4495 Visited.erase(&GEPI);
4498 Visited.insert(NewPhi);
4499 enqueueUsers(*NewPhi);
4500
4504 << "\n " << *In;
4505 dbgs() << "\n " << *NewPhi << '\n');
4506
4507 return true;
4508 }
4509
4510 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4511 if (unfoldGEPSelect(GEPI))
4512 return true;
4513
4514 if (unfoldGEPPhi(GEPI))
4515 return true;
4516
4517 enqueueUsers(GEPI);
4518 return false;
4519 }
4520
4521 bool visitPHINode(PHINode &PN) {
4522 enqueueUsers(PN);
4523 return false;
4524 }
4525
4526 bool visitSelectInst(SelectInst &SI) {
4527 enqueueUsers(SI);
4528 return false;
4529 }
4530};
4531
4532}
4533
4534
4535
4536
4537
4538
4540 if (Ty->isSingleValueType())
4541 return Ty;
4542
4543 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4545
4546 Type *InnerTy;
4548 InnerTy = ArrTy->getElementType();
4552 InnerTy = STy->getElementType(Index);
4553 } else {
4554 return Ty;
4555 }
4556
4557 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4558 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4559 return Ty;
4560
4562}
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4579 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4581 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4582 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4583 return nullptr;
4584
4586 Type *ElementTy;
4589 ElementTy = AT->getElementType();
4590 TyNumElements = AT->getNumElements();
4591 } else {
4592
4593
4595 ElementTy = VT->getElementType();
4596 TyNumElements = VT->getNumElements();
4597 }
4598 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4600 if (NumSkippedElements >= TyNumElements)
4601 return nullptr;
4602 Offset -= NumSkippedElements * ElementSize;
4603
4604
4605 if (Offset > 0 || Size < ElementSize) {
4606
4608 return nullptr;
4609
4611 }
4613
4614 if (Size == ElementSize)
4618 if (NumElements * ElementSize != Size)
4619 return nullptr;
4621 }
4622
4624 if (!STy)
4625 return nullptr;
4626
4628
4630 return nullptr;
4631
4633 return nullptr;
4636 return nullptr;
4637
4640
4642 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4643 if (Offset >= ElementSize)
4644 return nullptr;
4645
4646
4647 if (Offset > 0 || Size < ElementSize) {
4649 return nullptr;
4651 }
4653
4654 if (Size == ElementSize)
4656
4661 if (Index == EndIndex)
4662 return nullptr;
4663
4664
4665
4666
4667
4669 return nullptr;
4670
4671 assert(Index < EndIndex);
4673 }
4674
4675
4678 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4680 return nullptr;
4681
4682 return SubTy;
4683}
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4711 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4712
4713
4714
4715
4716
4719
4720
4721
4722
4723
4724 struct SplitOffsets {
4725 Slice *S;
4726 std::vector<uint64_t> Splits;
4727 };
4728 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4742
4743 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4744 for (auto &P : AS.partitions()) {
4745 for (Slice &S : P) {
4747 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4748
4749
4750
4752 UnsplittableLoads.insert(LI);
4755 UnsplittableLoads.insert(LI);
4756 continue;
4757 }
4758 assert(P.endOffset() > S.beginOffset() &&
4759 "Empty or backwards partition!");
4760
4761
4763 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4764
4765
4766
4767
4768 auto IsLoadSimplyStored = [](LoadInst *LI) {
4769 for (User *LU : LI->users()) {
4771 if (!SI || ->isSimple())
4772 return false;
4773 }
4774 return true;
4775 };
4776 if (!IsLoadSimplyStored(LI)) {
4777 UnsplittableLoads.insert(LI);
4778 continue;
4779 }
4780
4783 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4784
4785 continue;
4787 if (!StoredLoad || !StoredLoad->isSimple())
4788 continue;
4789 assert(->isVolatile() && "Cannot split volatile stores!");
4790
4792 } else {
4793
4794 continue;
4795 }
4796
4797
4799 auto &Offsets = SplitOffsetsMap[I];
4801 "Should not have splits the first time we see an instruction!");
4803 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4804 }
4805
4806
4807
4808 for (Slice *S : P.splitSliceTails()) {
4809 auto SplitOffsetsMapI =
4811 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4812 continue;
4813 auto &Offsets = SplitOffsetsMapI->second;
4814
4815 assert(Offsets.S == S && "Found a mismatched slice!");
4817 "Cannot have an empty set of splits on the second partition!");
4819 P.beginOffset() - Offsets.S->beginOffset() &&
4820 "Previous split does not end where this one begins!");
4821
4822
4823
4824 if (S->endOffset() > P.endOffset())
4825 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4826 }
4827 }
4828
4829
4830
4831
4832
4833 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4834
4835
4837
4838
4839 if (UnsplittableLoads.count(LI))
4840 return true;
4841
4842 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4843 if (LoadOffsetsI == SplitOffsetsMap.end())
4844 return false;
4845 auto &LoadOffsets = LoadOffsetsI->second;
4846
4847
4848 auto &StoreOffsets = SplitOffsetsMap[SI];
4849
4850
4851
4852
4853 if (LoadOffsets.Splits == StoreOffsets.Splits)
4854 return false;
4855
4856 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4857 << " " << *LI << "\n"
4858 << " " << *SI << "\n");
4859
4860
4861
4862
4863
4864 UnsplittableLoads.insert(LI);
4865 return true;
4866 });
4867
4868
4869
4870
4871 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4873 return UnsplittableLoads.count(LI);
4874 });
4875
4876
4877 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4878 return UnsplittableLoads.count(LI);
4879 });
4880
4881
4882
4883 if (Loads.empty() && Stores.empty())
4884 return false;
4885
4886
4887
4888 IRBuilderTy IRB(&AI);
4889
4890
4892
4893
4894
4895 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4906 std::vector<LoadInst *> SplitLoads;
4907 const DataLayout &DL = AI.getDataLayout();
4908 for (LoadInst *LI : Loads) {
4909 SplitLoads.clear();
4910
4911 auto &Offsets = SplitOffsetsMap[LI];
4912 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4914 "Load must have type size equal to store size");
4916 "Load must be >= slice size");
4917
4918 uint64_t BaseOffset = Offsets.S->beginOffset();
4919 assert(BaseOffset + SliceSize > BaseOffset &&
4920 "Cannot represent alloca access size using 64-bit integers!");
4921
4923 IRB.SetInsertPoint(LI);
4924
4925 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4926
4927 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4928 int Idx = 0, Size = Offsets.Splits.size();
4929 for (;;) {
4930 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4933 LoadInst *PLoad = IRB.CreateAlignedLoad(
4934 PartTy,
4936 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4937 PartPtrTy, BasePtr->getName() + "."),
4939 false, LI->getName());
4940 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4941 LLVMContext::MD_access_group});
4942
4943
4944
4945 SplitLoads.push_back(PLoad);
4946
4947
4949 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4951 false));
4952 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4953 << ", " << NewSlices.back().endOffset()
4954 << "): " << *PLoad << "\n");
4955
4956
4957 if (Idx >= Size)
4958 break;
4959
4960
4961 PartOffset = Offsets.Splits[Idx];
4962 ++Idx;
4963 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
4964 }
4965
4966
4967
4968
4969 bool DeferredStores = false;
4970 for (User *LU : LI->users()) {
4972 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
4973 DeferredStores = true;
4974 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
4975 << "\n");
4976 continue;
4977 }
4978
4979 Value *StoreBasePtr = SI->getPointerOperand();
4980 IRB.SetInsertPoint(SI);
4981 AAMDNodes AATags = SI->getAAMetadata();
4982
4983 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
4984
4985 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
4986 LoadInst *PLoad = SplitLoads[Idx];
4987 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
4988 auto *PartPtrTy = SI->getPointerOperandType();
4989
4990 auto AS = SI->getPointerAddressSpace();
4991 StoreInst *PStore = IRB.CreateAlignedStore(
4992 PLoad,
4994 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4995 PartPtrTy, StoreBasePtr->getName() + "."),
4997 false);
4998 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4999 LLVMContext::MD_access_group,
5000 LLVMContext::MD_DIAssignID});
5001
5002 if (AATags)
5005 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
5006 }
5007
5008
5009
5010
5011
5013 ResplitPromotableAllocas.insert(OtherAI);
5014 Worklist.insert(OtherAI);
5017 Worklist.insert(OtherAI);
5018 }
5019
5020
5021 DeadInsts.push_back(SI);
5022 }
5023
5024
5025 if (DeferredStores)
5026 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
5027
5028
5029 DeadInsts.push_back(LI);
5031 }
5032
5033
5034
5035
5036
5037
5038 for (StoreInst *SI : Stores) {
5042 uint64_t StoreSize = Ty->getBitWidth() / 8;
5043 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
5044
5045 auto &Offsets = SplitOffsetsMap[SI];
5047 "Slice size should always match load size exactly!");
5048 uint64_t BaseOffset = Offsets.S->beginOffset();
5049 assert(BaseOffset + StoreSize > BaseOffset &&
5050 "Cannot represent alloca access size using 64-bit integers!");
5051
5054
5055 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
5056
5057
5058 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
5059 std::vector<LoadInst *> *SplitLoads = nullptr;
5060 if (SplitLoadsMapI != SplitLoadsMap.end()) {
5061 SplitLoads = &SplitLoadsMapI->second;
5062 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
5063 "Too few split loads for the number of splits in the store!");
5064 } else {
5066 }
5067
5068 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
5069 int Idx = 0, Size = Offsets.Splits.size();
5070 for (;;) {
5071 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
5073 auto *StorePartPtrTy = SI->getPointerOperandType();
5074
5075
5076 LoadInst *PLoad;
5077 if (SplitLoads) {
5078 PLoad = (*SplitLoads)[Idx];
5079 } else {
5080 IRB.SetInsertPoint(LI);
5082 PLoad = IRB.CreateAlignedLoad(
5083 PartTy,
5085 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5086 LoadPartPtrTy, LoadBasePtr->getName() + "."),
5088 false, LI->getName());
5089 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5090 LLVMContext::MD_access_group});
5091 }
5092
5093
5094 IRB.SetInsertPoint(SI);
5095 auto AS = SI->getPointerAddressSpace();
5096 StoreInst *PStore = IRB.CreateAlignedStore(
5097 PLoad,
5099 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5100 StorePartPtrTy, StoreBasePtr->getName() + "."),
5102 false);
5103 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5104 LLVMContext::MD_access_group});
5105
5106
5108 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5110 false));
5111 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
5112 << ", " << NewSlices.back().endOffset()
5113 << "): " << *PStore << "\n");
5114 if (!SplitLoads) {
5115 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
5116 }
5117
5118
5119 if (Idx >= Size)
5120 break;
5121
5122
5123 PartOffset = Offsets.Splits[Idx];
5124 ++Idx;
5125 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
5126 }
5127
5128
5129
5130
5131
5132
5133 if (!SplitLoads) {
5135 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5136 ResplitPromotableAllocas.insert(OtherAI);
5137 Worklist.insert(OtherAI);
5140 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5141 Worklist.insert(OtherAI);
5142 }
5143 }
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5155 assert(*LI->user_begin() == SI && "Single use isn't this store!");
5156 DeadInsts.push_back(LI);
5157 }
5158 DeadInsts.push_back(SI);
5160 }
5161
5162
5163 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
5164
5165
5166
5167 AS.insert(NewSlices);
5168
5170#ifndef NDEBUG
5171 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
5173#endif
5174
5175
5176
5177 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5178
5179 return true;
5180}
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5193 Partition &P) {
5194
5195
5196
5197 Type *SliceTy = nullptr;
5198 const DataLayout &DL = AI.getDataLayout();
5199 unsigned VScale = AI.getFunction()->getVScaleValue();
5200
5201 std::pair<Type *, IntegerType *> CommonUseTy =
5203
5204 if (CommonUseTy.first) {
5205 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);
5207 SliceTy = CommonUseTy.first;
5208 }
5209
5210 if (!SliceTy)
5212 P.beginOffset(), P.size()))
5213 SliceTy = TypePartitionTy;
5214
5215
5216 if (!SliceTy && CommonUseTy.second)
5217 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size())
5218 SliceTy = CommonUseTy.second;
5219 if ((!SliceTy || (SliceTy->isArrayTy() &&
5221 DL.isLegalInteger(P.size() * 8)) {
5222 SliceTy = Type::getIntNTy(*C, P.size() * 8);
5223 }
5224
5225 if (!SliceTy)
5226 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
5227 assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());
5228
5230
5233 if (VecTy)
5234 SliceTy = VecTy;
5235
5236
5237
5238
5239
5240
5241
5242 AllocaInst *NewAI;
5243 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
5244 NewAI = &AI;
5245
5246
5247
5248 } else {
5249
5251
5252
5253 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
5254 NewAI = new AllocaInst(
5255 SliceTy, AI.getAddressSpace(), nullptr,
5256 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
5257 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
5258 AI.getIterator());
5259
5261 ++NumNewAllocas;
5262 }
5263
5264 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
5265 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
5266
5267
5268
5269
5270 unsigned PPWOldSize = PostPromotionWorklist.size();
5271 unsigned NumUses = 0;
5272 SmallSetVector<PHINode *, 8> PHIUsers;
5273 SmallSetVector<SelectInst *, 8> SelectUsers;
5274
5275 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
5276 P.endOffset(), IsIntegerPromotable, VecTy,
5277 PHIUsers, SelectUsers);
5278 bool Promotable = true;
5279
5280 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {
5281 NumUses += DeletedValues->size() + 1;
5282 for (Value *V : *DeletedValues)
5283 DeadInsts.push_back(V);
5284 } else {
5285 for (Slice *S : P.splitSliceTails()) {
5286 Promotable &= Rewriter.visit(S);
5287 ++NumUses;
5288 }
5289 for (Slice &S : P) {
5290 Promotable &= Rewriter.visit(&S);
5291 ++NumUses;
5292 }
5293 }
5294
5295 NumAllocaPartitionUses += NumUses;
5296 MaxUsesPerAllocaPartition.updateMax(NumUses);
5297
5298
5299
5300 for (PHINode *PHI : PHIUsers)
5302 Promotable = false;
5303 PHIUsers.clear();
5304 SelectUsers.clear();
5305 break;
5306 }
5307
5309 NewSelectsToRewrite;
5310 NewSelectsToRewrite.reserve(SelectUsers.size());
5311 for (SelectInst *Sel : SelectUsers) {
5312 std::optional Ops =
5313 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5314 if () {
5315 Promotable = false;
5316 PHIUsers.clear();
5317 SelectUsers.clear();
5318 NewSelectsToRewrite.clear();
5319 break;
5320 }
5321 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
5322 }
5323
5324 if (Promotable) {
5325 for (Use *U : AS.getDeadUsesIfPromotable()) {
5327 Value::dropDroppableUse(*U);
5328 if (OldInst)
5330 DeadInsts.push_back(OldInst);
5331 }
5332 if (PHIUsers.empty() && SelectUsers.empty()) {
5333
5334 PromotableAllocas.insert(NewAI);
5335 } else {
5336
5337
5338
5339 SpeculatablePHIs.insert_range(PHIUsers);
5340 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5341 NewSelectsToRewrite.size());
5343 std::make_move_iterator(NewSelectsToRewrite.begin()),
5344 std::make_move_iterator(NewSelectsToRewrite.end())))
5345 SelectsToRewrite.insert(std::move(KV));
5346 Worklist.insert(NewAI);
5347 }
5348 } else {
5349
5350 while (PostPromotionWorklist.size() > PPWOldSize)
5351 PostPromotionWorklist.pop_back();
5352
5353
5354
5355 if (NewAI == &AI)
5356 return nullptr;
5357
5358
5359
5360
5361 Worklist.insert(NewAI);
5362 }
5363
5364 return NewAI;
5365}
5366
5367
5368
5374
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5408 int64_t BitExtractOffset) {
5410 bool HasFragment = false;
5411 bool HasBitExtract = false;
5412
5415 HasFragment = true;
5416 continue;
5417 }
5420 HasBitExtract = true;
5421 int64_t ExtractOffsetInBits = Op.getArg(0);
5422 int64_t ExtractSizeInBits = Op.getArg(1);
5423
5424
5425
5426
5427
5429 return nullptr;
5430
5431 assert(BitExtractOffset <= 0);
5432 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5433
5434
5435
5436
5437
5438 if (AdjustedOffset < 0)
5439 return nullptr;
5440
5441 Ops.push_back(Op.getOp());
5442 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5443 Ops.push_back(ExtractSizeInBits);
5444 continue;
5445 }
5447 }
5448
5449
5450
5451 if (HasFragment && HasBitExtract)
5452 return nullptr;
5453
5454 if (!HasBitExtract) {
5458 }
5460}
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470static void
5473 std::optionalDIExpression::FragmentInfo NewFragment,
5474 int64_t BitExtractAdjustment) {
5475 (void)DIB;
5476
5477
5478
5479
5482 if (NewFragment)
5484 BitExtractAdjustment);
5485 if (!NewFragmentExpr)
5486 return;
5487
5491 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5493 return;
5494 }
5495
5499
5500
5501
5504 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5506 return;
5507 }
5508
5509
5510 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5511 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5513 }
5514
5516 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5518 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5519 (void)NewAssign;
5520}
5521
5522
5523
5524bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5525 if (AS.begin() == AS.end())
5526 return false;
5527
5528 unsigned NumPartitions = 0;
5530 const DataLayout &DL = AI.getModule()->getDataLayout();
5531
5532
5533 Changed |= presplitLoadsAndStores(AI, AS);
5534
5535
5536
5537
5538
5539
5540
5541 bool IsSorted = true;
5542
5543 uint64_t AllocaSize =
5544 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5545 const uint64_t MaxBitVectorSize = 1024;
5546 if (AllocaSize <= MaxBitVectorSize) {
5547
5548
5549 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5550 for (Slice &S : AS)
5551 for (unsigned O = S.beginOffset() + 1;
5552 O < S.endOffset() && O < AllocaSize; O++)
5553 SplittableOffset.reset(O);
5554
5555 for (Slice &S : AS) {
5556 if (!S.isSplittable())
5557 continue;
5558
5559 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5560 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5561 continue;
5562
5565 S.makeUnsplittable();
5566 IsSorted = false;
5567 }
5568 }
5569 } else {
5570
5571
5572 for (Slice &S : AS) {
5573 if (!S.isSplittable())
5574 continue;
5575
5576 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5577 continue;
5578
5581 S.makeUnsplittable();
5582 IsSorted = false;
5583 }
5584 }
5585 }
5586
5587 if (!IsSorted)
5589
5590
5591
5592 struct Fragment {
5593 AllocaInst *Alloca;
5595 uint64_t Size;
5596 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5598 };
5600
5601
5602 for (auto &P : AS.partitions()) {
5603 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5604 Changed = true;
5605 if (NewAI != &AI) {
5606 uint64_t SizeOfByte = 8;
5607 uint64_t AllocaSize =
5608 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5609
5610 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5611 Fragments.push_back(
5612 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5613 }
5614 }
5615 ++NumPartitions;
5616 }
5617
5618 NumAllocaPartitions += NumPartitions;
5619 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5620
5621
5622
5623 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5624
5626 return;
5627
5628 const Value *DbgPtr = DbgVariable->getAddress();
5630 DbgVariable->getFragmentOrEntireVariable();
5631
5632
5633 int64_t CurrentExprOffsetInBytes = 0;
5634 SmallVector<uint64_t> PostOffsetOps;
5636 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5637 return;
5638
5639
5640 int64_t ExtractOffsetInBits = 0;
5644 ExtractOffsetInBits = Op.getArg(0);
5645 break;
5646 }
5647 }
5648
5649 DIBuilder DIB(*AI.getModule(), false);
5650 for (auto Fragment : Fragments) {
5651 int64_t OffsetFromLocationInBits;
5652 std::optionalDIExpression::FragmentInfo NewDbgFragment;
5653
5654
5655
5657 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5658 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5659 NewDbgFragment, OffsetFromLocationInBits))
5660 continue;
5661
5662
5663
5664
5665 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5666 continue;
5667
5668
5669
5670 if (!NewDbgFragment)
5671 NewDbgFragment = DbgVariable->getFragment();
5672
5673
5674
5675 int64_t OffestFromNewAllocaInBits =
5676 OffsetFromLocationInBits - ExtractOffsetInBits;
5677
5678
5679 int64_t BitExtractOffset =
5680 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5681
5682
5683
5684
5685 OffestFromNewAllocaInBits =
5686 std::max(int64_t(0), OffestFromNewAllocaInBits);
5687
5688
5689
5690
5691
5692 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5693 if (OffestFromNewAllocaInBits > 0) {
5694 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5696 }
5697
5698
5699
5700 auto RemoveOne = [DbgVariable](auto *OldDII) {
5701 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5702 return LHS->getVariable() == RHS->getVariable() &&
5703 LHS->getDebugLoc()->getInlinedAt() ==
5704 RHS->getDebugLoc()->getInlinedAt();
5705 };
5706 if (SameVariableFragment(OldDII, DbgVariable))
5707 OldDII->eraseFromParent();
5708 };
5711 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5712 NewDbgFragment, BitExtractOffset);
5713 }
5714 };
5715
5716
5717
5721
5723}
5724
5725
5726void SROA::clobberUse(Use &U) {
5728
5730
5731
5732
5733
5736 DeadInsts.push_back(OldI);
5737 }
5738}
5739
5740
5742public:
5749
5753
5754private:
5755 Type *ZeroType;
5756};
5757
5758bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5759
5760
5761
5762
5763 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5764 bool AllSameAndValid = true;
5765 Type *PartitionType = nullptr;
5767 uint64_t BeginOffset = 0;
5768 uint64_t EndOffset = 0;
5769
5770 auto Flush = [&]() {
5771 if (AllSameAndValid && !Insts.empty()) {
5772 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5773 << EndOffset << ")\n");
5775 SSAUpdater SSA(&NewPHIs);
5777 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5778 Promoter.run(Insts);
5779 }
5780 AllSameAndValid = true;
5781 PartitionType = nullptr;
5783 };
5784
5785 for (Slice &S : AS) {
5789 dbgs() << "Ignoring slice: ";
5790 AS.print(dbgs(), &S);
5791 });
5792 continue;
5793 }
5794 if (S.beginOffset() >= EndOffset) {
5795 Flush();
5796 BeginOffset = S.beginOffset();
5797 EndOffset = S.endOffset();
5798 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5799 if (AllSameAndValid) {
5801 dbgs() << "Slice does not match range [" << BeginOffset << ", "
5802 << EndOffset << ")";
5803 AS.print(dbgs(), &S);
5804 });
5805 AllSameAndValid = false;
5806 }
5807 EndOffset = std::max(EndOffset, S.endOffset());
5808 continue;
5809 }
5810
5813
5814 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5815 AllSameAndValid = false;
5816 PartitionType = UserTy;
5819 Type *UserTy = SI->getValueOperand()->getType();
5820 if (->isSimple() || (PartitionType && UserTy != PartitionType))
5821 AllSameAndValid = false;
5822 PartitionType = UserTy;
5824 } else {
5825 AllSameAndValid = false;
5826 }
5827 }
5828
5829 Flush();
5830 return true;
5831}
5832
5833
5834
5835
5836
5837
5838std::pair<bool , bool >
5839SROA::runOnAlloca(AllocaInst &AI) {
5841 bool CFGChanged = false;
5842
5843 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5844 ++NumAllocasAnalyzed;
5845
5846
5847 if (AI.use_empty()) {
5848 AI.eraseFromParent();
5850 return {Changed, CFGChanged};
5851 }
5852 const DataLayout &DL = AI.getDataLayout();
5853
5854
5855 auto *AT = AI.getAllocatedType();
5856 TypeSize Size = DL.getTypeAllocSize(AT);
5857 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
5858 Size.getFixedValue() == 0)
5859 return {Changed, CFGChanged};
5860
5861
5862
5863 IRBuilderTy IRB(&AI);
5864 AggLoadStoreRewriter AggRewriter(DL, IRB);
5865 Changed |= AggRewriter.rewrite(AI);
5866
5867
5868 AllocaSlices AS(DL, AI);
5870 if (AS.isEscaped())
5871 return {Changed, CFGChanged};
5872
5873 if (AS.isEscapedReadOnly()) {
5874 Changed |= propagateStoredValuesToLoads(AI, AS);
5875 return {Changed, CFGChanged};
5876 }
5877
5878
5879 for (Instruction *DeadUser : AS.getDeadUsers()) {
5880
5881 for (Use &DeadOp : DeadUser->operands())
5882 clobberUse(DeadOp);
5883
5884
5885 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
5886
5887
5888 DeadInsts.push_back(DeadUser);
5890 }
5891 for (Use *DeadOp : AS.getDeadOperands()) {
5892 clobberUse(*DeadOp);
5894 }
5895
5896
5897 if (AS.begin() == AS.end())
5898 return {Changed, CFGChanged};
5899
5900 Changed |= splitAlloca(AI, AS);
5901
5903 while (!SpeculatablePHIs.empty())
5905
5907 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5908 while (!RemainingSelectsToRewrite.empty()) {
5909 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
5910 CFGChanged |=
5912 }
5913
5914 return {Changed, CFGChanged};
5915}
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926bool SROA::deleteDeadInstructions(
5927 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5929 while (!DeadInsts.empty()) {
5931 if ()
5932 continue;
5933 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
5934
5935
5936
5937
5939 DeletedAllocas.insert(AI);
5941 OldDII->eraseFromParent();
5942 }
5943
5946
5947 for (Use &Operand : I->operands())
5949
5950 Operand = nullptr;
5952 DeadInsts.push_back(U);
5953 }
5954
5955 ++NumDeleted;
5956 I->eraseFromParent();
5958 }
5960}
5961
5962
5963
5964
5965
5966bool SROA::promoteAllocas() {
5967 if (PromotableAllocas.empty())
5968 return false;
5969
5971 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
5972 } else {
5973 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
5974 NumPromoted += PromotableAllocas.size();
5975 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5976 }
5977
5978 PromotableAllocas.clear();
5979 return true;
5980}
5981
5982std::pair<bool , bool > SROA::runSROA(Function &F) {
5983 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
5984
5985 const DataLayout &DL = F.getDataLayout();
5986 BasicBlock &EntryBB = F.getEntryBlock();
5990 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
5992 PromotableAllocas.insert(AI);
5993 else
5994 Worklist.insert(AI);
5995 }
5996 }
5997
5999 bool CFGChanged = false;
6000
6001
6002 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6003
6004 do {
6005 while (!Worklist.empty()) {
6006 auto [IterationChanged, IterationCFGChanged] =
6007 runOnAlloca(*Worklist.pop_back_val());
6008 Changed |= IterationChanged;
6009 CFGChanged |= IterationCFGChanged;
6010
6011 Changed |= deleteDeadInstructions(DeletedAllocas);
6012
6013
6014
6015 if (!DeletedAllocas.empty()) {
6016 Worklist.set_subtract(DeletedAllocas);
6017 PostPromotionWorklist.set_subtract(DeletedAllocas);
6018 PromotableAllocas.set_subtract(DeletedAllocas);
6019 DeletedAllocas.clear();
6020 }
6021 }
6022
6023 Changed |= promoteAllocas();
6024
6025 Worklist = PostPromotionWorklist;
6026 PostPromotionWorklist.clear();
6027 } while (!Worklist.empty());
6028
6029 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
6031 "Should not have modified the CFG when told to preserve it.");
6032
6034 for (auto &BB : F) {
6036 }
6037 }
6038
6039 return {Changed, CFGChanged};
6040}
6041
6045 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6046 auto [Changed, CFGChanged] =
6047 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6051 if (!CFGChanged)
6054 return PA;
6055}
6056
6060 OS, MapClassName2PassName);
6062 : "");
6063}
6064
6066
6067namespace {
6068
6069
6072
6073public:
6074 static char ID;
6075
6079 }
6080
6082 if (skipFunction(F))
6083 return false;
6084
6085 DominatorTree &DT = getAnalysis().getDomTree();
6087 getAnalysis().getAssumptionCache(F);
6088 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6090 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6092 }
6093
6094 void getAnalysisUsage(AnalysisUsage &AU) const override {
6095 AU.addRequired();
6096 AU.addRequired();
6098 AU.addPreserved();
6099 }
6100
6101 StringRef getPassName() const override { return "SROA"; }
6102};
6103
6104}
6105
6106char SROALegacyPass::ID = 0;
6107
6112
6114 "Scalar Replacement Of Aggregates", false, false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition SROA.cpp:342
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition SROA.cpp:2335
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition SROA.cpp:1920
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
Definition SROA.cpp:2186
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition SROA.cpp:1486
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition SROA.cpp:4539
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition SROA.cpp:277
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition SROA.cpp:5406
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2578
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
Definition SROA.cpp:2111
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition SROA.cpp:1909
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition SROA.cpp:2417
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition SROA.cpp:2611
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition SROA.cpp:1005
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition SROA.cpp:1875
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition SROA.cpp:1808
static Value * foldSelectInst(SelectInst &SI)
Definition SROA.cpp:992
bool isKillAddress(const DbgVariableRecord *DVR)
Definition SROA.cpp:5369
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition SROA.cpp:2633
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition SROA.cpp:2512
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition SROA.cpp:1626
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
Definition SROA.cpp:2291
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
Definition SROA.cpp:323
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition SROA.cpp:1552
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2553
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
Definition SROA.cpp:5471
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition SROA.cpp:1769
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
Definition SROA.cpp:2707
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
Definition SROA.cpp:5375
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition SROA.cpp:4577
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
Definition SROA.cpp:1930
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition SROA.cpp:1707
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition SROA.cpp:2020
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
Definition SROA.cpp:1017
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition SROA.cpp:1033
An iterator over partitions of the alloca's slices.
Definition SROA.cpp:805
bool operator==(const partition_iterator &RHS) const
Definition SROA.cpp:952
friend class AllocaSlices
Definition SROA.cpp:806
partition_iterator & operator++()
Definition SROA.cpp:972
Partition & operator*()
Definition SROA.cpp:977
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition SROA.cpp:6042
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition SROA.cpp:6057
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition SROA.cpp:6065
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
Offsets
Offsets in bytes from the start of the input buffer.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition SROA.cpp:6108
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.