LLVM: lib/CodeGen/InterleavedLoadCombinePass.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
42
43#include
44#include
45#include
46
47using namespace llvm;
48
49#define DEBUG_TYPE "interleaved-load-combine"
50
51namespace {
52
53
54STATISTIC(NumInterleavedLoadCombine, "Number of combined loads");
55
56
57static cl::opt DisableInterleavedLoadCombine(
59 cl::desc("Disable combining of interleaved loads"));
60
61struct VectorInfo;
62
63struct InterleavedLoadCombineImpl {
64public:
68 : F(F), DT(DT), MSSA(MSSA),
70
71
72
73 bool run();
74
75private:
76
78
79
81
82
84
85
87
88
90
91
92
93 LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs);
94
95
96
97
98 bool combine(std::list &InterleavedLoad,
100
101
102
103 bool findPattern(std::list &Candidates,
104 std::list &InterleavedLoad, unsigned Factor,
106};
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164class Polynomial {
165
166 enum BOps {
167 LShr,
169 SExt,
170 Trunc,
171 };
172
173
174 unsigned ErrorMSBs = (unsigned)-1;
175
176
177 Value *V = nullptr;
178
179
181
182
184
185public:
186 Polynomial(Value *V) : V(V) {
188 if (Ty) {
189 ErrorMSBs = 0;
190 this->V = V;
191 A = APInt(Ty->getBitWidth(), 0);
192 }
193 }
194
195 Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
196 : ErrorMSBs(ErrorMSBs), A(A) {}
197
198 Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
199 : ErrorMSBs(ErrorMSBs), A(BitWidth, A) {}
200
201 Polynomial() = default;
202
203
204 void incErrorMSBs(unsigned amt) {
205 if (ErrorMSBs == (unsigned)-1)
206 return;
207
208 ErrorMSBs += amt;
209 if (ErrorMSBs > A.getBitWidth())
210 ErrorMSBs = A.getBitWidth();
211 }
212
213
214 void decErrorMSBs(unsigned amt) {
215 if (ErrorMSBs == (unsigned)-1)
216 return;
217
218 if (ErrorMSBs > amt)
219 ErrorMSBs -= amt;
220 else
221 ErrorMSBs = 0;
222 }
223
224
225 Polynomial &add(const APInt &C) {
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242 if (C.getBitWidth() != A.getBitWidth()) {
244 return *this;
245 }
246
248 return *this;
249 }
250
251
252 Polynomial &mul(const APInt &C) {
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303 if (C.getBitWidth() != A.getBitWidth()) {
305 return *this;
306 }
307
308
309 if (C.isOne()) {
310 return *this;
311 }
312
313
314 if (C.isZero()) {
315 ErrorMSBs = 0;
316 deleteB();
317 }
318
319
320
321 decErrorMSBs(C.countr_zero());
322
325 return *this;
326 }
327
328
329 Polynomial &lshr(const APInt &C) {
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460 if (C.getBitWidth() != A.getBitWidth()) {
462 return *this;
463 }
464
465 if (C.isZero())
466 return *this;
467
468
469 unsigned shiftAmt = C.getZExtValue();
470 if (shiftAmt >= C.getBitWidth())
471 return mul(APInt(C.getBitWidth(), 0));
472
473
474
475
476
477
478 if (A.countr_zero() < shiftAmt)
479 ErrorMSBs = A.getBitWidth();
480 else
481 incErrorMSBs(shiftAmt);
482
483
484 pushBOperation(LShr, C);
486
487 return *this;
488 }
489
490
491 Polynomial &sextOrTrunc(unsigned n) {
492 if (n < A.getBitWidth()) {
493
494
495 decErrorMSBs(A.getBitWidth() - n);
497 pushBOperation(Trunc, APInt(sizeof(n) * 8, n));
498 }
499 if (n > A.getBitWidth()) {
500
501
502 incErrorMSBs(n - A.getBitWidth());
504 pushBOperation(SExt, APInt(sizeof(n) * 8, n));
505 }
506
507 return *this;
508 }
509
510
511 bool isFirstOrder() const { return V != nullptr; }
512
513
514 bool isCompatibleTo(const Polynomial &o) const {
515
516 if (A.getBitWidth() != o.A.getBitWidth())
517 return false;
518
519
520 if (!isFirstOrder() && !o.isFirstOrder())
521 return true;
522
523
524 if (V != o.V)
525 return false;
526
527
528 if (B.size() != o.B.size())
529 return false;
530
531 auto *ob = o.B.begin();
532 for (const auto &b : B) {
533 if (b != *ob)
534 return false;
535 ob++;
536 }
537
538 return true;
539 }
540
541
542
543 Polynomial operator-(const Polynomial &o) const {
544
545 if (!isCompatibleTo(o))
546 return Polynomial();
547
548
549
550
551 return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs));
552 }
553
554
556 Polynomial Result(*this);
557 Result.A -= C;
558 return Result;
559 }
560
561
563 Polynomial Result(*this);
564 Result.A += C;
565 return Result;
566 }
567
568
569 bool isProvenEqualTo(const Polynomial &o) {
570
571 Polynomial r = *this - o;
572 return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero());
573 }
574
575
577 OS << "[{#ErrBits:" << ErrorMSBs << "} ";
578
579 if (V) {
580 for (auto b : B)
581 OS << "(";
582 OS << "(" << *V << ") ";
583
584 for (auto b : B) {
585 switch (b.first) {
586 case LShr:
587 OS << "LShr ";
588 break;
589 case Mul:
590 OS << "Mul ";
591 break;
592 case SExt:
593 OS << "SExt ";
594 break;
595 case Trunc:
596 OS << "Trunc ";
597 break;
598 }
599
600 OS << b.second << ") ";
601 }
602 }
603
604 OS << "+ " << A << "]";
605 }
606
607private:
608 void deleteB() {
609 V = nullptr;
610 B.clear();
611 }
612
613 void pushBOperation(const BOps Op, const APInt &C) {
614 if (isFirstOrder()) {
615 B.push_back(std::make_pair(Op, C));
616 return;
617 }
618 }
619};
620
621#ifndef NDEBUG
623 S.print(OS);
624 return OS;
625}
626#endif
627
628
629
630
631
632
633
634
635struct VectorInfo {
636private:
637 VectorInfo(const VectorInfo &c) : VTy(c.VTy) {
639 "Copying VectorInfo is neither implemented nor necessary,");
640 }
641
642public:
643
644 struct ElementInfo {
645
646 Polynomial Ofs;
647
648
649
651
652 ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr)
653 : Ofs(Offset), LI(LI) {}
654 };
655
656
658
659
660 Value *PV = nullptr;
661
662
663 std::set<LoadInst *> LIs;
664
665
666 std::set<Instruction *> Is;
667
668
670
671
672 ElementInfo *EI;
673
674
676
679 }
680
681 VectorInfo &operator=(const VectorInfo &other) = delete;
682
683 virtual ~VectorInfo() { delete[] EI; }
684
685 unsigned getDimension() const { return VTy->getNumElements(); }
686
687
688
689
690
691
692
693
694 bool isInterleaved(unsigned Factor, const DataLayout &DL) const {
696 for (unsigned i = 1; i < getDimension(); i++) {
697 if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) {
698 return false;
699 }
700 }
701 return true;
702 }
703
704
705
706
707
708
709
710
711
712 static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) {
714 if (SVI)
715 return computeFromSVI(SVI, Result, DL);
717 if (LI)
718 return computeFromLI(LI, Result, DL);
720 if (BCI)
721 return computeFromBCI(BCI, Result, DL);
722 return false;
723 }
724
725
726
727
728
729
730
731 static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result,
734
735 if ()
736 return false;
737
739 if (!VTy)
740 return false;
741
742
743 if (Result.VTy->getNumElements() % VTy->getNumElements())
744 return false;
745
746 unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements();
747 unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType());
748 unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType());
749
750 if (NewSize * Factor != OldSize)
751 return false;
752
753 VectorInfo Old(VTy);
754 if (!compute(Op, Old, DL))
755 return false;
756
757 for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) {
758 for (unsigned j = 0; j < Factor; j++) {
759 Result.EI[i + j] =
760 ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize,
761 j == 0 ? Old.EI[i / Factor].LI : nullptr);
762 }
763 }
764
765 Result.BB = Old.BB;
766 Result.PV = Old.PV;
767 Result.LIs.insert(Old.LIs.begin(), Old.LIs.end());
768 Result.Is.insert(Old.Is.begin(), Old.Is.end());
769 Result.Is.insert(BCI);
770 Result.SVI = nullptr;
771
772 return true;
773 }
774
775
776
777
778
779
780
781
782
783
784
785
786 static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,
790
791
792 VectorInfo LHS(ArgTy);
794 LHS.BB = nullptr;
795
796
797 VectorInfo RHS(ArgTy);
799 RHS.BB = nullptr;
800
801
802 if (.BB &&
.BB)
803 return false;
804
805 else if (.BB) {
806 Result.BB = RHS.BB;
807 Result.PV = RHS.PV;
808 }
809
810 else if (.BB) {
811 Result.BB = LHS.BB;
812 Result.PV = LHS.PV;
813 }
814
815 else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) {
816 Result.BB = LHS.BB;
817 Result.PV = LHS.PV;
818 }
819
820 else {
821 return false;
822 }
823
824
825 if (LHS.BB) {
826 Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end());
827 Result.Is.insert(LHS.Is.begin(), LHS.Is.end());
828 }
829 if (RHS.BB) {
830 Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end());
831 Result.Is.insert(RHS.Is.begin(), RHS.Is.end());
832 }
833 Result.Is.insert(SVI);
834 Result.SVI = SVI;
835
836 int j = 0;
839 "Invalid ShuffleVectorInst (index out of bounds)");
840
841 if (i < 0)
842 Result.EI[j] = ElementInfo();
844 if (LHS.BB)
845 Result.EI[j] = LHS.EI[i];
846 else
847 Result.EI[j] = ElementInfo();
848 } else {
849 if (RHS.BB)
851 else
852 Result.EI[j] = ElementInfo();
853 }
854 j++;
855 }
856
857 return true;
858 }
859
860
861
862
863
864
865
866
867
868 static bool computeFromLI(LoadInst *LI, VectorInfo &Result,
872
874 return false;
875
877 return false;
878
879 if (.typeSizeEqualsStoreSize(Result.VTy->getElementType()))
880 return false;
881
882
884
886 Result.PV = BasePtr;
887 Result.LIs.insert(LI);
888 Result.Is.insert(LI);
889
890 for (unsigned i = 0; i < Result.getDimension(); i++) {
891 Value *Idx[2] = {
894 };
895 int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, Idx);
896 Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
897 }
898
899 return true;
900 }
901
902
903
904
905
906 static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) {
909
910
914 if (C)
916 }
917
919 case Instruction::Add:
920 if ()
921 break;
922
923 computePolynomial(*LHS, Result);
924 Result.add(C->getValue());
925 return;
926
927 case Instruction::LShr:
928 if ()
929 break;
930
931 computePolynomial(*LHS, Result);
932 Result.lshr(C->getValue());
933 return;
934
935 default:
936 break;
937 }
938
939 Result = Polynomial(&BO);
940 }
941
942
943
944
945
946 static void computePolynomial(Value &V, Polynomial &Result) {
948 computePolynomialBinOp(*BO, Result);
949 else
950 Result = Polynomial(&V);
951 }
952
953
954
955
956
957
958
959 static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result,
962
964 if (!PtrTy) {
965 Result = Polynomial();
966 BasePtr = nullptr;
967 return;
968 }
969 unsigned PointerBits =
970 DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
971
972
976 case Instruction::BitCast:
977 computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL);
978 break;
979 default:
980 BasePtr = &Ptr;
981 Polynomial(PointerBits, 0);
982 break;
983 }
984 }
985
988
989 APInt BaseOffset(PointerBits, 0);
990
991
992 if (GEP.accumulateConstantOffset(DL, BaseOffset)) {
993 Result = Polynomial(BaseOffset);
994 BasePtr = GEP.getPointerOperand();
995 return;
996 } else {
997
998
999 unsigned idxOperand, e;
1001 for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e;
1002 idxOperand++) {
1004 if (!IDX)
1005 break;
1007 }
1008
1009
1010 if (idxOperand + 1 != e) {
1011 Result = Polynomial();
1012 BasePtr = nullptr;
1013 return;
1014 }
1015
1016
1017 computePolynomial(*GEP.getOperand(idxOperand), Result);
1018
1019
1020
1021 BaseOffset =
1022 DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices);
1023
1024
1025 unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType());
1026 Result.sextOrTrunc(PointerBits);
1027 Result.mul(APInt(PointerBits, ResultSize));
1028 Result.add(BaseOffset);
1029 BasePtr = GEP.getPointerOperand();
1030 }
1031 }
1032
1033
1034 else {
1035 BasePtr = &Ptr;
1036 Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0);
1037 }
1038 }
1039
1040#ifndef NDEBUG
1042 if (PV)
1043 OS << *PV;
1044 else
1045 OS << "(none)";
1046 OS << " + ";
1047 for (unsigned i = 0; i < getDimension(); i++)
1048 OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs;
1049 OS << "]";
1050 }
1051#endif
1052};
1053
1054}
1055
1056bool InterleavedLoadCombineImpl::findPattern(
1057 std::list &Candidates, std::list &InterleavedLoad,
1059 for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) {
1060 unsigned i;
1061
1062 unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType());
1063
1064
1065 std::vector<std::list::iterator> Res(Factor, Candidates.end());
1066
1067 for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) {
1068 if (C->VTy != C0->VTy)
1069 continue;
1070 if (C->BB != C0->BB)
1071 continue;
1072 if (C->PV != C0->PV)
1073 continue;
1074
1075
1076 for (i = 1; i < Factor; i++) {
1077 if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) {
1078 Res[i] = C;
1079 }
1080 }
1081
1082 for (i = 1; i < Factor; i++) {
1083 if (Res[i] == Candidates.end())
1084 break;
1085 }
1086 if (i == Factor) {
1087 Res[0] = C0;
1088 break;
1089 }
1090 }
1091
1092 if (Res[0] != Candidates.end()) {
1093
1094 for (unsigned i = 0; i < Factor; i++) {
1095 InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]);
1096 }
1097
1098 return true;
1099 }
1100 }
1101 return false;
1102}
1103
1104LoadInst *
1105InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
1106 assert(!LIs.empty() && "No load instructions given.");
1107
1108
1109 BasicBlock *BB = (*LIs.begin())->getParent();
1111 *BB, [&LIs](Instruction &I) -> bool { return is_contained(LIs, &I); });
1113
1115}
1116
1117bool InterleavedLoadCombineImpl::combine(std::list &InterleavedLoad,
1118 OptimizationRemarkEmitter &ORE) {
1119 LLVM_DEBUG(dbgs() << "Checking interleaved load\n");
1120
1121
1122
1123
1124 LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI;
1125
1126
1127 if (!InsertionPoint)
1128 return false;
1129
1130 std::set<LoadInst *> LIs;
1131 std::set<Instruction *> Is;
1132 std::set<Instruction *> SVIs;
1133
1137
1138
1139 unsigned Factor = InterleavedLoad.size();
1140
1141
1142 for (auto &VI : InterleavedLoad) {
1143
1144 LIs.insert(VI.LIs.begin(), VI.LIs.end());
1145
1146
1147
1148
1149 Is.insert(VI.Is.begin(), VI.Is.end());
1150
1151
1152 SVIs.insert(VI.SVI);
1153 }
1154
1155
1156 if (LIs.size() < 2)
1157 return false;
1158
1159
1160
1161
1162 for (const auto &I : Is) {
1163
1165
1166
1167 if (SVIs.find(I) != SVIs.end())
1168 continue;
1169
1170
1171
1172 for (auto *U : I->users()) {
1174 return false;
1175 }
1176 }
1177
1178
1180 return false;
1181
1182
1183
1184 LoadInst *First = findFirstLoad(LIs);
1185
1186
1187
1188
1190 for (auto *LI : LIs) {
1192 if (!MSSA.dominates(MADef, FMA))
1193 return false;
1194 }
1195 assert(!LIs.empty() && "There are no LoadInst to combine");
1196
1197
1198 for (auto &VI : InterleavedLoad) {
1199 if (!DT.dominates(InsertionPoint, VI.SVI))
1200 return false;
1201 }
1202
1203
1204
1206 Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();
1207 unsigned ElementsPerSVI =
1209 ->getNumElements();
1211
1214 Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
1216
1218 return false;
1219 }
1220
1221
1223 auto LI = Builder.CreateAlignedLoad(ILTy, Ptr, InsertionPoint->getAlign(),
1224 "interleaved.wide.load");
1225 auto MSSAU = MemorySSAUpdater(&MSSA);
1226 MemoryUse *MSSALoad = cast(MSSAU.createMemoryAccessBefore(
1228 MSSAU.insertUse(MSSALoad, true);
1229
1230
1231 int i = 0;
1232 for (auto &VI : InterleavedLoad) {
1233 SmallVector<int, 4> Mask;
1234 for (unsigned j = 0; j < ElementsPerSVI; j++)
1235 Mask.push_back(i + j * Factor);
1236
1237 Builder.SetInsertPoint(VI.SVI);
1238 auto SVI = Builder.CreateShuffleVector(LI, Mask, "interleaved.shuffle");
1239 VI.SVI->replaceAllUsesWith(SVI);
1240 i++;
1241 }
1242
1243 NumInterleavedLoadCombine++;
1244 ORE.emit([&]() {
1245 return OptimizationRemark(DEBUG_TYPE, "Combined Interleaved Load", LI)
1246 << "Load interleaved combined with factor "
1247 << ore::NV("Factor", Factor);
1248 });
1249
1250 return true;
1251}
1252
1253bool InterleavedLoadCombineImpl::run() {
1254 OptimizationRemarkEmitter ORE(&F);
1255 bool changed = false;
1257
1258 auto &DL = F.getDataLayout();
1259
1260
1261 for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {
1262 std::list Candidates;
1263
1264 for (BasicBlock &BB : F) {
1265 for (Instruction &I : BB) {
1267
1269 continue;
1270
1272
1273 if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {
1274 Candidates.pop_back();
1275 continue;
1276 }
1277
1278 if (!Candidates.back().isInterleaved(Factor, DL)) {
1279 Candidates.pop_back();
1280 }
1281 }
1282 }
1283 }
1284
1285 std::list InterleavedLoad;
1286 while (findPattern(Candidates, InterleavedLoad, Factor, DL)) {
1287 if (combine(InterleavedLoad, ORE)) {
1288 changed = true;
1289 } else {
1290
1291
1292 Candidates.splice(Candidates.begin(), InterleavedLoad,
1293 std::next(InterleavedLoad.begin()),
1294 InterleavedLoad.end());
1295 }
1296 InterleavedLoad.clear();
1297 }
1298 }
1299
1300 return changed;
1301}
1302
1303namespace {
1304
1305
1306struct InterleavedLoadCombine : public FunctionPass {
1307 static char ID;
1308
1309 InterleavedLoadCombine() : FunctionPass(ID) {
1311 }
1312
1313 StringRef getPassName() const override {
1314 return "Interleaved Load Combine Pass";
1315 }
1316
1318 if (DisableInterleavedLoadCombine)
1319 return false;
1320
1321 auto *TPC = getAnalysisIfAvailable();
1322 if (!TPC)
1323 return false;
1324
1325 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()
1326 << "\n");
1327
1328 return InterleavedLoadCombineImpl(
1329 F, getAnalysis().getDomTree(),
1330 getAnalysis().getMSSA(),
1331 getAnalysis().getTTI(F),
1332 TPC->getTM())
1333 .run();
1334 }
1335
1336 void getAnalysisUsage(AnalysisUsage &AU) const override {
1338 AU.addRequired();
1339 AU.addRequired();
1340 FunctionPass::getAnalysisUsage(AU);
1341 }
1342
1343private:
1344};
1345}
1346
1347PreservedAnalyses
1349
1353 bool Changed = InterleavedLoadCombineImpl(F, DT, MemSSA, TTI, *TM).run();
1355}
1356
1357char InterleavedLoadCombine::ID = 0;
1358
1361 "Combine interleaved loads into wide loads and shufflevector instructions",
1362 false, false)
1368 "Combine interleaved loads into wide loads and shufflevector instructions",
1370
1373 auto P = new InterleavedLoadCombine();
1374 return P;
1375}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool runOnFunction(Function &F, bool PostInlining)
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
This class represents a no-op cast from one type to another.
This is the base class for all instructions that perform data casts.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
This is the shared class of boolean and integer constants.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
Class to represent integer types.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition InterleavedLoadCombinePass.cpp:1348
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Align getAlign() const
Return the alignment of the access that is being performed.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
TargetCostKind
The kind of cost model.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Type * getElementType() const
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI void initializeInterleavedLoadCombinePass(PassRegistry &)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
APInt operator+(APInt a, const APInt &b)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createInterleavedLoadCombinePass()
InterleavedLoadCombines Pass - This pass identifies interleaved loads and combines them into wide loa...
Definition InterleavedLoadCombinePass.cpp:1372
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.