LLVM: lib/Transforms/Vectorize/VectorCombine.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
38#include
39#include
40#include
41#include
42
43#define DEBUG_TYPE "vector-combine"
45
46using namespace llvm;
48
49STATISTIC(NumVecLoad, "Number of vector loads formed");
50STATISTIC(NumVecCmp, "Number of vector compares formed");
51STATISTIC(NumVecBO, "Number of vector binops formed");
52STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps, "Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp, "Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic, "Number of scalar intrinsic calls formed");
57
60 cl::desc("Disable all vector combine transforms"));
61
64 cl::desc("Disable binop extract to shuffle transforms"));
65
68 cl::desc("Max number of instructions to scan for vector combining."));
69
70static const unsigned InvalidIndex = std::numeric_limits::max();
71
72namespace {
73class VectorCombine {
74public:
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
82
83 bool run();
84
85private:
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
90 AAResults &AA;
91 AssumptionCache ∾
92 const DataLayout *DL;
94 const SimplifyQuery SQ;
95
96
97
98 bool TryEarlyFoldsOnly;
99
100 InstructionWorklist Worklist;
101
102
103
105
106
107
108
109 bool vectorizeLoadInsert(Instruction &I);
110 bool widenSubvectorLoad(Instruction &I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex) const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &I);
121 bool foldInsExtFNeg(Instruction &I);
122 bool foldInsExtBinop(Instruction &I);
123 bool foldInsExtVectorToShuffle(Instruction &I);
124 bool foldBitOpOfCastops(Instruction &I);
125 bool foldBitOpOfCastConstant(Instruction &I);
126 bool foldBitcastShuffle(Instruction &I);
127 bool scalarizeOpOrCmp(Instruction &I);
128 bool scalarizeVPIntrinsic(Instruction &I);
129 bool foldExtractedCmps(Instruction &I);
130 bool foldBinopOfReductions(Instruction &I);
131 bool foldSingleElementStore(Instruction &I);
132 bool scalarizeLoad(Instruction &I);
133 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy, Value *Ptr);
134 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy, Value *Ptr);
135 bool scalarizeExtExtract(Instruction &I);
136 bool foldConcatOfBoolMasks(Instruction &I);
137 bool foldPermuteOfBinops(Instruction &I);
138 bool foldShuffleOfBinops(Instruction &I);
139 bool foldShuffleOfSelects(Instruction &I);
140 bool foldShuffleOfCastops(Instruction &I);
141 bool foldShuffleOfShuffles(Instruction &I);
142 bool foldPermuteOfIntrinsic(Instruction &I);
143 bool foldShuffleOfIntrinsics(Instruction &I);
144 bool foldShuffleToIdentity(Instruction &I);
145 bool foldShuffleFromReductions(Instruction &I);
146 bool foldShuffleChainsToReduce(Instruction &I);
147 bool foldCastFromReductions(Instruction &I);
148 bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
149 bool foldInterleaveIntrinsics(Instruction &I);
150 bool shrinkType(Instruction &I);
151 bool shrinkLoadForShuffles(Instruction &I);
152 bool shrinkPhiOfShuffles(Instruction &I);
153
154 void replaceValue(Instruction &Old, Value &New, bool Erase = true) {
155 LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
159 New.takeName(&Old);
160 Worklist.pushUsersToWorkList(*NewI);
161 Worklist.pushValue(NewI);
162 }
165 } else {
166 Worklist.push(&Old);
167 }
168 }
169
173 Worklist.remove(&I);
174 I.eraseFromParent();
175
176
177
178 SmallPtrSet<Value *, 4> Visited;
183 OpI, nullptr, nullptr, [&](Value *V) {
186 Worklist.remove(I);
187 if (I == NextInst)
188 NextInst = NextInst->getNextNode();
190 }
191 }))
192 continue;
193 Worklist.pushUsersToWorkList(*OpI);
194 Worklist.pushValue(OpI);
195 }
196 }
197 }
198 }
199};
200}
201
202
203
209
211
212
213
214 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
215 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
217 return false;
218
219
220
221 Type *ScalarTy = Load->getType()->getScalarType();
223 unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
224 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
225 ScalarSize % 8 != 0)
226 return false;
227
228 return true;
229}
230
231bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
232
233
237 return false;
238
239
242 if (!HasExtract)
244
247 return false;
248
252
253
254
255
256
257 Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
259
260 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
261 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
262 unsigned OffsetEltIndex = 0;
263 Align Alignment = Load->getAlign();
265 &DT)) {
266
267
268
269
270 unsigned OffsetBitWidth = DL->getIndexTypeSizeInBits(SrcPtr->getType());
271 APInt Offset(OffsetBitWidth, 0);
273
274
275
276 if (Offset.isNegative())
277 return false;
278
279
280
281 uint64_t ScalarSizeInBytes = ScalarSize / 8;
282 if (Offset.urem(ScalarSizeInBytes) != 0)
283 return false;
284
285
286 OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
287 if (OffsetEltIndex >= MinVecNumElts)
288 return false;
289
291 &DT))
292 return false;
293
294
295
296
298 }
299
300
301
303 Type *LoadTy = Load->getType();
304 unsigned AS = Load->getPointerAddressSpace();
308 OldCost +=
310 true, HasExtract, CostKind);
311
312
315
316
317
318
319
320
321
323 unsigned OutputNumElts = Ty->getNumElements();
325 assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
326 Mask[0] = OffsetEltIndex;
327 if (OffsetEltIndex)
330
331
332
333 if (OldCost < NewCost || !NewCost.isValid())
334 return false;
335
336
337
339 Value *CastedPtr =
343
344 replaceValue(I, *VecLd);
345 ++NumVecLoad;
346 return true;
347}
348
349
350
351
352bool VectorCombine::widenSubvectorLoad(Instruction &I) {
353
355 if (!Shuf->isIdentityWithPadding())
356 return false;
357
358
359 unsigned NumOpElts =
361 unsigned OpIndex = any_of(Shuf->getShuffleMask(), [&NumOpElts](int M) {
362 return M >= (int)(NumOpElts);
363 });
364
367 return false;
368
369
370
371
373 Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
375 Align Alignment = Load->getAlign();
377 return false;
378
380 Type *LoadTy = Load->getType();
381 unsigned AS = Load->getPointerAddressSpace();
382
383
384
385
386
389
390
393
394
395
396 if (OldCost < NewCost || !NewCost.isValid())
397 return false;
398
400 Value *CastedPtr =
403 replaceValue(I, *VecLd);
404 ++NumVecLoad;
405 return true;
406}
407
408
409
410ExtractElementInst *VectorCombine::getShuffleExtract(
411 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
412 unsigned PreferredExtractIndex = InvalidIndex) const {
415 assert(Index0C && Index1C && "Expected constant extract indexes");
416
417 unsigned Index0 = Index0C->getZExtValue();
418 unsigned Index1 = Index1C->getZExtValue();
419
420
421 if (Index0 == Index1)
422 return nullptr;
423
430
431
433 return nullptr;
434
435
436
437
438 if (Cost0 > Cost1)
439 return Ext0;
440 if (Cost1 > Cost0)
441 return Ext1;
442
443
444
445 if (PreferredExtractIndex == Index0)
446 return Ext1;
447 if (PreferredExtractIndex == Index1)
448 return Ext0;
449
450
451 return Index0 > Index1 ? Ext0 : Ext1;
452}
453
454
455
456
457
458
459bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
460 ExtractElementInst *Ext1,
461 const Instruction &I,
462 ExtractElementInst *&ConvertToShuffle,
463 unsigned PreferredExtractIndex) {
466 assert(Ext0IndexC && Ext1IndexC && "Expected constant extract indexes");
467
468 unsigned Opcode = I.getOpcode();
474
475
477 if (IsBinOp) {
480 } else {
481 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
482 "Expected a compare");
488 }
489
490
491
492 unsigned Ext0Index = Ext0IndexC->getZExtValue();
493 unsigned Ext1Index = Ext1IndexC->getZExtValue();
494
499
500
501
502
503
504
505
506
507 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
508 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
509 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
510
511
512
514 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
515
516
517
518
519 bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
521 OldCost = CheapExtractCost + ScalarOpCost;
522 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
523 } else {
524
525
526 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
527 NewCost = VectorOpCost + CheapExtractCost +
528 !Ext0->hasOneUse() * Extract0Cost +
529 !Ext1->hasOneUse() * Extract1Cost;
530 }
531
532 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
533 if (ConvertToShuffle) {
535 return true;
536
537
538
539
540
541
542
543
545 SmallVector ShuffleMask(FixedVecTy->getNumElements(),
547 ShuffleMask[BestInsIndex] = BestExtIndex;
549 VecTy, VecTy, ShuffleMask, CostKind, 0,
550 nullptr, {ConvertToShuffle});
551 } else {
553 VecTy, VecTy, {}, CostKind, 0, nullptr,
554 {ConvertToShuffle});
555 }
556 }
557
558
559
560
561 return OldCost < NewCost;
562}
563
564
565
568
569
570
573 ShufMask[NewIndex] = OldIndex;
574 return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
575}
576
577
578
579
580
583
586 return nullptr;
587
588
589
593 return nullptr;
594
596 NewIndex, Builder);
597 return Shuf;
598}
599
600
601
602
604 Instruction &I) {
606
607
608
609 ++NumVecCmp;
613}
614
615
616
617
619 Instruction &I) {
621
622
623
624 ++NumVecBO;
626 V1, "foldExtExtBinop");
627
628
629
631 VecBOInst->copyIRFlags(&I);
632
634}
635
636
637bool VectorCombine::foldExtractExtract(Instruction &I) {
638
639
641 return false;
642
647 return false;
648
650 uint64_t C0, C1;
654 return false;
655
656
657
658
659
660
664 if (I.hasOneUse())
667
668 ExtractElementInst *ExtractToChange;
669 if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
670 return false;
671
674
675 if (ExtractToChange) {
676 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
677 Value *NewExtOp =
679 if (!NewExtOp)
680 return false;
681 if (ExtractToChange == Ext0)
682 ExtOp0 = NewExtOp;
683 else
684 ExtOp1 = NewExtOp;
685 }
686
690 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex, I)
691 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex, I);
692 Worklist.push(Ext0);
693 Worklist.push(Ext1);
694 replaceValue(I, *NewExt);
695 return true;
696}
697
698
699
700bool VectorCombine::foldInsExtFNeg(Instruction &I) {
701
703 uint64_t ExtIdx, InsIdx;
707 return false;
708
709
715 return false;
716
718 auto *DstVecScalarTy = DstVecTy->getScalarType();
720 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
721 return false;
722
723
724
725 unsigned NumDstElts = DstVecTy->getNumElements();
726 unsigned NumSrcElts = SrcVecTy->getNumElements();
727 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
728 return false;
729
730
731
732
733 SmallVector Mask(NumDstElts);
734 std::iota(Mask.begin(), Mask.end(), 0);
735 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
739
740
741
742
745
750
751 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
752
753
754 SmallVector SrcMask;
755 if (NeedLenChg) {
757 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
759 DstVecTy, SrcVecTy, SrcMask, CostKind);
760 }
761
762 LLVM_DEBUG(dbgs() << "Found an insertion of (extract)fneg : " << I
763 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
764 << "\n");
765 if (NewCost > OldCost)
766 return false;
767
768 Value *NewShuf, *LenChgShuf = nullptr;
769
771 if (NeedLenChg) {
772
776 } else {
777
779 }
780
782 replaceValue(I, *NewShuf);
783 return true;
784}
785
786
787
788bool VectorCombine::foldInsExtBinop(Instruction &I) {
789 BinaryOperator *VecBinOp, *SclBinOp;
794 return false;
795
796
798 if (BinOpcode != SclBinOp->getOpcode())
799 return false;
800
802 if (!ResultTy)
803 return false;
804
805
806
807
819
820 LLVM_DEBUG(dbgs() << "Found an insertion of two binops: " << I
821 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
822 << "\n");
823 if (NewCost > OldCost)
824 return false;
825
830 Value *NewBO = Builder.CreateBinOp(BinOpcode, NewIns0, NewIns1);
831
832
834 NewInst->copyIRFlags(VecBinOp);
835 NewInst->andIRFlags(SclBinOp);
836 }
837
840 replaceValue(I, *NewBO);
841 return true;
842}
843
844
845
846bool VectorCombine::foldBitOpOfCastops(Instruction &I) {
847
849 if (!BinOp || !BinOp->isBitwiseLogicOp())
850 return false;
851
852
855 if (!LHSCast || !RHSCast) {
856 LLVM_DEBUG(dbgs() << " One or both operands are not cast instructions\n");
857 return false;
858 }
859
860
862 if (CastOpcode != RHSCast->getOpcode())
863 return false;
864
865
866 switch (CastOpcode) {
867 case Instruction::BitCast:
868 case Instruction::Trunc:
869 case Instruction::SExt:
870 case Instruction::ZExt:
871 break;
872 default:
873 return false;
874 }
875
876 Value *LHSSrc = LHSCast->getOperand(0);
877 Value *RHSSrc = RHSCast->getOperand(0);
878
879
881 return false;
882
883 auto *SrcTy = LHSSrc->getType();
884 auto *DstTy = I.getType();
885
886
887 if (CastOpcode != Instruction::BitCast &&
889 return false;
890
891
892 if (!SrcTy->getScalarType()->isIntegerTy() ||
893 !DstTy->getScalarType()->isIntegerTy())
894 return false;
895
896
897
898
899
900
905
908 LHSCastCost + RHSCastCost;
909
910
913
916 GenericCastCost;
917
918
919 if (!LHSCast->hasOneUse())
920 NewCost += LHSCastCost;
921 if (!RHSCast->hasOneUse())
922 NewCost += RHSCastCost;
923
924 LLVM_DEBUG(dbgs() << "foldBitOpOfCastops: OldCost=" << OldCost
925 << " NewCost=" << NewCost << "\n");
926
927 if (NewCost > OldCost)
928 return false;
929
930
931 Value *NewOp = Builder.CreateBinOp(BinOp->getOpcode(), LHSSrc, RHSSrc,
932 BinOp->getName() + ".inner");
934 NewBinOp->copyIRFlags(BinOp);
935
937
938
940
941
944
945
947
948 replaceValue(I, *Result);
949 return true;
950}
951
952
953
954
955
956
957bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) {
960
961
963 return false;
964
965
967 if (!LHSCast)
968 return false;
969
971
972
973 switch (CastOpcode) {
974 case Instruction::BitCast:
975 case Instruction::ZExt:
976 case Instruction::SExt:
977 case Instruction::Trunc:
978 break;
979 default:
980 return false;
981 }
982
983 Value *LHSSrc = LHSCast->getOperand(0);
984
985 auto *SrcTy = LHSSrc->getType();
986 auto *DstTy = I.getType();
987
988
989 if (CastOpcode != Instruction::BitCast &&
991 return false;
992
993
994 if (!SrcTy->getScalarType()->isIntegerTy() ||
995 !DstTy->getScalarType()->isIntegerTy())
996 return false;
997
998
999 PreservedCastFlags RHSFlags;
1001 if (!InvC)
1002 return false;
1003
1004
1005
1006
1007
1008
1011
1014
1015
1018
1021 GenericCastCost;
1022
1023
1024 if (!LHSCast->hasOneUse())
1025 NewCost += LHSCastCost;
1026
1027 LLVM_DEBUG(dbgs() << "foldBitOpOfCastConstant: OldCost=" << OldCost
1028 << " NewCost=" << NewCost << "\n");
1029
1030 if (NewCost > OldCost)
1031 return false;
1032
1033
1035 LHSSrc, InvC, I.getName() + ".inner");
1037 NewBinOp->copyIRFlags(&I);
1038
1040
1041
1043
1044
1045 if (RHSFlags.NNeg)
1047 if (RHSFlags.NUW)
1049 if (RHSFlags.NSW)
1051
1053
1054
1056
1057 replaceValue(I, *Result);
1058 return true;
1059}
1060
1061
1062
1063
1064bool VectorCombine::foldBitcastShuffle(Instruction &I) {
1066 ArrayRef Mask;
1069 return false;
1070
1071
1072
1073
1074
1075
1078 if (!DestTy || !SrcTy)
1079 return false;
1080
1081 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1082 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1083 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1084 return false;
1085
1087
1088
1089
1090 if (!IsUnary) {
1093 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1094 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1095 return false;
1096 }
1097
1098 SmallVector<int, 16> NewMask;
1099 if (DestEltSize <= SrcEltSize) {
1100
1101
1102 assert(SrcEltSize % DestEltSize == 0 && "Unexpected shuffle mask");
1103 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1105 } else {
1106
1107
1108 assert(DestEltSize % SrcEltSize == 0 && "Unexpected shuffle mask");
1109 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1111 return false;
1112 }
1113
1114
1115
1116 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1117 auto *NewShuffleTy =
1119 auto *OldShuffleTy =
1121 unsigned NumOps = IsUnary ? 1 : 2;
1122
1123
1127
1131 TargetTransformInfo::CastContextHint::None,
1136 TargetTransformInfo::CastContextHint::None,
1138
1139 LLVM_DEBUG(dbgs() << "Found a bitcasted shuffle: " << I << "\n OldCost: "
1140 << OldCost << " vs NewCost: " << NewCost << "\n");
1141
1142 if (NewCost > OldCost || !NewCost.isValid())
1143 return false;
1144
1145
1146 ++NumShufOfBitcast;
1150 replaceValue(I, *Shuf);
1151 return true;
1152}
1153
1154
1155
1156
1157bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
1159 return false;
1163
1165 return false;
1166
1167
1168
1171 if (!ScalarOp0 || !ScalarOp1)
1172 return false;
1173
1174
1175
1176
1177
1178
1179 auto IsAllTrueMask = [](Value *MaskVal) {
1182 return ConstValue->isAllOnesValue();
1183 return false;
1184 };
1186 return false;
1187
1188
1191 return false;
1192
1193
1194
1196 SmallVector Mask;
1198 Mask.resize(FVTy->getNumElements(), 0);
1203
1204
1207 Args.push_back(V->getType());
1208 IntrinsicCostAttributes Attrs(IntrID, VecTy, Args);
1211
1212
1213 std::optional FunctionalOpcode =
1215 std::optionalIntrinsic::ID ScalarIntrID = std::nullopt;
1216 if (!FunctionalOpcode) {
1218 if (!ScalarIntrID)
1219 return false;
1220 }
1221
1222
1224 if (ScalarIntrID) {
1225 IntrinsicCostAttributes Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1227 } else {
1229 VecTy->getScalarType(), CostKind);
1230 }
1231
1232
1235 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1236
1237 LLVM_DEBUG(dbgs() << "Found a VP Intrinsic to scalarize: " << VPI
1238 << "\n");
1239 LLVM_DEBUG(dbgs() << "Cost of Intrinsic: " << OldCost
1240 << ", Cost of scalarizing:" << NewCost << "\n");
1241
1242
1243 if (OldCost < NewCost || !NewCost.isValid())
1244 return false;
1245
1246
1249
1250
1251
1252
1253
1254 bool SafeToSpeculate;
1255 if (ScalarIntrID)
1257 .hasAttribute(Attribute::AttrKind::Speculatable);
1258 else
1260 *FunctionalOpcode, &VPI, nullptr, &AC, &DT);
1261 if (!SafeToSpeculate &&
1263 return false;
1264
1265 Value *ScalarVal =
1266 ScalarIntrID
1267 ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
1268 {ScalarOp0, ScalarOp1})
1270 ScalarOp0, ScalarOp1);
1271
1273 return true;
1274}
1275
1276
1277
1278
1279bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
1284 if (!UO && !BO && !CI && )
1285 return false;
1286
1287
1288 if (II) {
1290 return false;
1291 for (auto [Idx, Arg] : enumerate(II->args()))
1292 if (Arg->getType() != II->getType() &&
1294 return false;
1295 }
1296
1297
1298
1299
1300
1301 if (CI)
1302 for (User *U : I.users())
1304 return false;
1305
1306
1307
1309 std::optional<uint64_t> Index;
1310
1311 auto Ops = II ? II->args() : I.operands();
1315 uint64_t InsIdx = 0;
1318
1320 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1321 return false;
1322
1323
1324 if (!Index)
1326 else if (InsIdx != *Index)
1327 return false;
1331 OpNum, &TTI)) {
1337 } else {
1338 return false;
1339 }
1340 }
1341
1342
1343 if (.has_value())
1344 return false;
1345
1347 Type *ScalarTy = VecTy->getScalarType();
1348 assert(VecTy->isVectorTy() &&
1351 "Unexpected types for insert element into binop or cmp");
1352
1353 unsigned Opcode = I.getOpcode();
1355 if (CI) {
1361 } else if (UO || BO) {
1364 } else {
1365 IntrinsicCostAttributes ScalarICA(
1366 II->getIntrinsicID(), ScalarTy,
1369 IntrinsicCostAttributes VectorICA(
1370 II->getIntrinsicID(), VecTy,
1373 }
1374
1375
1376
1377 Value *NewVecC = nullptr;
1378 if (CI)
1379 NewVecC = simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1380 else if (UO)
1381 NewVecC =
1382 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1383 else if (BO)
1384 NewVecC = simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1385 else if (II)
1386 NewVecC = simplifyCall(II, II->getCalledOperand(), VecCs, SQ);
1387
1388 if (!NewVecC)
1389 return false;
1390
1391
1392
1397
1398 for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {
1400 II->getIntrinsicID(), Idx, &TTI)))
1401 continue;
1403 Instruction::InsertElement, VecTy, CostKind, *Index, VecC, Scalar);
1404 OldCost += InsertCost;
1405 NewCost += ->hasOneUse() * InsertCost;
1406 }
1407
1408
1409 if (OldCost < NewCost || !NewCost.isValid())
1410 return false;
1411
1412
1413
1414 if (CI)
1415 ++NumScalarCmp;
1416 else if (UO || BO)
1417 ++NumScalarOps;
1418 else
1419 ++NumScalarIntrinsic;
1420
1421
1422 for (auto [OpIdx, Scalar, VecC] : enumerate(ScalarOps, VecCs))
1423 if (!Scalar)
1426
1428 if (CI)
1429 Scalar = Builder.CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1430 else if (UO || BO)
1432 else
1434
1435 Scalar->setName(I.getName() + ".scalar");
1436
1437
1438
1440 ScalarInst->copyIRFlags(&I);
1441
1443 replaceValue(I, *Insert);
1444 return true;
1445}
1446
1447
1448
1449
1450bool VectorCombine::foldExtractedCmps(Instruction &I) {
1452
1453
1454
1455 if (!BI || .getType()->isIntegerTy(1))
1456 return false;
1457
1458
1459
1460 Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
1466 return false;
1467
1469 if (!MatchingPred)
1470 return false;
1471
1472
1473
1475 uint64_t Index0, Index1;
1478 return false;
1479
1482 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1, CostKind);
1483 if (!ConvertToShuf)
1484 return false;
1485 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1486 "Unknown ExtractElementInst");
1487
1488
1489
1491 unsigned CmpOpcode =
1494 if (!VecTy)
1495 return false;
1496
1504
1506 Ext0Cost + Ext1Cost + CmpCost * 2 +
1508
1509
1510
1511
1512 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1513 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1518 ShufMask[CheapIndex] = ExpensiveIndex;
1523 NewCost += Ext0->hasOneUse() ? 0 : Ext0Cost;
1524 NewCost += Ext1->hasOneUse() ? 0 : Ext1Cost;
1525
1526
1527
1528
1529 if (OldCost < NewCost || !NewCost.isValid())
1530 return false;
1531
1532
1535 CmpC[Index0] = C0;
1536 CmpC[Index1] = C1;
1539 Value *LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1540 Value *RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1543 replaceValue(I, *NewExt);
1544 ++NumVecCmpBO;
1545 return true;
1546}
1547
1556 unsigned ReductionOpc =
1560 auto *ExtType = cast(RedOp->getOperand(0)->getType());
1561
1562 CostBeforeReduction =
1563 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1565 CostAfterReduction =
1566 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned, II.getType(),
1568 return;
1569 }
1570 if (RedOp && II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1576 (Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1577
1581
1583 TTI.getCastInstrCost(Op0->getOpcode(), MulType, ExtType,
1586 TTI.getArithmeticInstrCost(Instruction::Mul, MulType, CostKind);
1588 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1590
1591 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1592 CostAfterReduction = TTI.getMulAccReductionCost(
1593 IsUnsigned, ReductionOpc, II.getType(), ExtType, CostKind);
1594 return;
1595 }
1596 CostAfterReduction = TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1598}
1599
1600bool VectorCombine::foldBinopOfReductions(Instruction &I) {
1603 if (BinOpOpc == Instruction::Sub)
1604 ReductionIID = Intrinsic::vector_reduce_add;
1606 return false;
1607
1608 auto checkIntrinsicAndGetItsArgument = [](Value *V,
1611 if ()
1612 return nullptr;
1613 if (II->getIntrinsicID() == IID && II->hasOneUse())
1614 return II->getArgOperand(0);
1615 return nullptr;
1616 };
1617
1618 Value *V0 = checkIntrinsicAndGetItsArgument(I.getOperand(0), ReductionIID);
1619 if (!V0)
1620 return false;
1621 Value *V1 = checkIntrinsicAndGetItsArgument(I.getOperand(1), ReductionIID);
1622 if (!V1)
1623 return false;
1624
1626 if (V1->getType() != VTy)
1627 return false;
1630 unsigned ReductionOpc =
1632
1642 NewCost =
1643 CostOfRedOperand0 + CostOfRedOperand1 +
1646 if (NewCost >= OldCost || !NewCost.isValid())
1647 return false;
1648
1649 LLVM_DEBUG(dbgs() << "Found two mergeable reductions: " << I
1650 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
1651 << "\n");
1652 Value *VectorBO;
1653 if (BinOpOpc == Instruction::Or)
1654 VectorBO = Builder.CreateOr(V0, V1, "",
1656 else
1657 VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
1658
1660 replaceValue(I, *Rdx);
1661 return true;
1662}
1663
1664
1668 unsigned NumScanned = 0;
1669 return std::any_of(Begin, End, [&](const Instruction &Instr) {
1670 return isModSet(AA.getModRefInfo(&Instr, Loc)) ||
1672 });
1673}
1674
1675namespace {
1676
1677
1678class ScalarizationResult {
1679 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1680
1681 StatusTy Status;
1682 Value *ToFreeze;
1683
1684 ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
1685 : Status(Status), ToFreeze(ToFreeze) {}
1686
1687public:
1688 ScalarizationResult(const ScalarizationResult &Other) = default;
1689 ~ScalarizationResult() {
1690 assert(!ToFreeze && "freeze() not called with ToFreeze being set");
1691 }
1692
1693 static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
1694 static ScalarizationResult safe() { return {StatusTy::Safe}; }
1695 static ScalarizationResult safeWithFreeze(Value *ToFreeze) {
1696 return {StatusTy::SafeWithFreeze, ToFreeze};
1697 }
1698
1699
1700 bool isSafe() const { return Status == StatusTy::Safe; }
1701
1702 bool isUnsafe() const { return Status == StatusTy::Unsafe; }
1703
1704
1705 bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
1706
1707
1708 void discard() {
1709 ToFreeze = nullptr;
1710 Status = StatusTy::Unsafe;
1711 }
1712
1713
1714 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1715 assert(isSafeWithFreeze() &&
1716 "should only be used when freezing is required");
1718 "UserI must be a user of ToFreeze");
1719 IRBuilder<>::InsertPointGuard Guard(Builder);
1724 if (U.get() == ToFreeze)
1725 U.set(Frozen);
1726
1727 ToFreeze = nullptr;
1728 }
1729};
1730}
1731
1732
1733
1738
1739
1740
1741 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1743
1745 if (C->getValue().ult(NumElements))
1746 return ScalarizationResult::safe();
1747 return ScalarizationResult::unsafe();
1748 }
1749
1750
1752 return ScalarizationResult::unsafe();
1753
1754 APInt Zero(IntWidth, 0);
1755 APInt MaxElts(IntWidth, NumElements);
1758
1761 true, &AC, CtxI, &DT)))
1762 return ScalarizationResult::safe();
1763 return ScalarizationResult::unsafe();
1764 }
1765
1766
1767
1774 }
1775
1776 if (ValidIndices.contains(IdxRange))
1777 return ScalarizationResult::safeWithFreeze(IdxBase);
1778 return ScalarizationResult::unsafe();
1779}
1780
1781
1782
1783
1784
1790 C->getZExtValue() * DL.getTypeStoreSize(ScalarType));
1791 return commonAlignment(VectorAlignment, DL.getTypeStoreSize(ScalarType));
1792}
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802bool VectorCombine::foldSingleElementStore(Instruction &I) {
1804 return false;
1806 if (->isSimple() ||
(SI->getValueOperand()->getType()))
1807 return false;
1808
1809
1810
1812 Value *NewElement;
1814 if ((SI->getValueOperand(),
1817 return false;
1818
1821 Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
1822
1823
1824 if (->isSimple() || Load->getParent() != SI->getParent() ||
1825 ->typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||
1826 SrcAddr != SI->getPointerOperand()->stripPointerCasts())
1827 return false;
1828
1829 auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
1830 if (ScalarizableIdx.isUnsafe() ||
1833 return false;
1834
1835
1836
1837 Worklist.push(Load);
1838
1839 if (ScalarizableIdx.isSafeWithFreeze())
1842 SI->getValueOperand()->getType(), SI->getPointerOperand(),
1843 {ConstantInt::get(Idx->getType(), 0), Idx});
1844 StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
1847 std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
1848 *DL);
1850 replaceValue(I, *NSI);
1852 return true;
1853 }
1854
1855 return false;
1856}
1857
1858
1859
1860bool VectorCombine::scalarizeLoad(Instruction &I) {
1863 return false;
1864
1867 if (LI->isVolatile() || ->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1868 return false;
1869
1870 bool AllExtracts = true;
1871 bool AllBitcasts = true;
1873 unsigned NumInstChecked = 0;
1874
1875
1876
1877
1878 for (User *U : LI->users()) {
1880 if (!UI || UI->getParent() != LI->getParent())
1881 return false;
1882
1883
1884
1885 if (UI->use_empty())
1886 return false;
1887
1889 AllExtracts = false;
1891 AllBitcasts = false;
1892
1893
1895 for (Instruction &I :
1896 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1897
1898
1899 if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory())
1900 return false;
1901 NumInstChecked++;
1902 }
1903 LastCheckedInst = UI;
1904 }
1905 }
1906
1907 if (AllExtracts)
1908 return scalarizeLoadExtract(LI, VecTy, Ptr);
1909 if (AllBitcasts)
1910 return scalarizeLoadBitcast(LI, VecTy, Ptr);
1911 return false;
1912}
1913
1914
1915bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
1918 return false;
1919
1920 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1922
1923 for (auto &Pair : NeedFreeze)
1924 Pair.second.discard();
1925 });
1926
1931
1932 for (User *U : LI->users()) {
1934
1935 auto ScalarIdx =
1937 if (ScalarIdx.isUnsafe())
1938 return false;
1939 if (ScalarIdx.isSafeWithFreeze()) {
1940 NeedFreeze.try_emplace(UI, ScalarIdx);
1941 ScalarIdx.discard();
1942 }
1943
1945 OriginalCost +=
1947 Index ? Index->getZExtValue() : -1);
1948 ScalarizedCost +=
1952 nullptr, nullptr, CostKind);
1953 }
1954
1955 LLVM_DEBUG(dbgs() << "Found all extractions of a vector load: " << *LI
1956 << "\n LoadExtractCost: " << OriginalCost
1957 << " vs ScalarizedCost: " << ScalarizedCost << "\n");
1958
1959 if (ScalarizedCost >= OriginalCost)
1960 return false;
1961
1962
1963
1964 Worklist.push(LI);
1965
1966 Type *ElemType = VecTy->getElementType();
1967
1968
1969 for (User *U : LI->users()) {
1971 Value *Idx = EI->getIndexOperand();
1972
1973
1974 auto It = NeedFreeze.find(EI);
1975 if (It != NeedFreeze.end())
1977
1982 Builder.CreateLoad(ElemType, GEP, EI->getName() + ".scalar"));
1983
1984 Align ScalarOpAlignment =
1986 NewLoad->setAlignment(ScalarOpAlignment);
1987
1989 size_t Offset = ConstIdx->getZExtValue() * DL->getTypeStoreSize(ElemType);
1992 }
1993
1994 replaceValue(*EI, *NewLoad, false);
1995 }
1996
1997 FailureGuard.release();
1998 return true;
1999}
2000
2001
2002bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2007
2008 Type *TargetScalarType = nullptr;
2009 unsigned VecBitWidth = DL->getTypeSizeInBits(VecTy);
2010
2011 for (User *U : LI->users()) {
2013
2014 Type *DestTy = BC->getDestTy();
2016 return false;
2017
2018 unsigned DestBitWidth = DL->getTypeSizeInBits(DestTy);
2019 if (DestBitWidth != VecBitWidth)
2020 return false;
2021
2022
2023 if (!TargetScalarType)
2024 TargetScalarType = DestTy;
2025 else if (TargetScalarType != DestTy)
2026 return false;
2027
2028 OriginalCost +=
2031 }
2032
2033 if (!TargetScalarType)
2034 return false;
2035
2036 assert(!LI->user_empty() && "Unexpected load without bitcast users");
2040
2041 LLVM_DEBUG(dbgs() << "Found vector load feeding only bitcasts: " << *LI
2042 << "\n OriginalCost: " << OriginalCost
2043 << " vs ScalarizedCost: " << ScalarizedCost << "\n");
2044
2045 if (ScalarizedCost >= OriginalCost)
2046 return false;
2047
2048
2049
2050 Worklist.push(LI);
2051
2053 auto *ScalarLoad =
2054 Builder.CreateLoad(TargetScalarType, Ptr, LI->getName() + ".scalar");
2056 ScalarLoad->copyMetadata(*LI);
2057
2058
2059 for (User *U : LI->users()) {
2061 replaceValue(*BC, *ScalarLoad, false);
2062 }
2063
2064 return true;
2065}
2066
2067bool VectorCombine::scalarizeExtExtract(Instruction &I) {
2069 return false;
2071 if (!Ext)
2072 return false;
2073
2074
2075
2076
2078 if (!SrcTy)
2079 return false;
2081
2082 Type *ScalarDstTy = DstTy->getElementType();
2083 if (DL->getTypeSizeInBits(SrcTy) != DL->getTypeSizeInBits(ScalarDstTy))
2084 return false;
2085
2089 unsigned ExtCnt = 0;
2090 bool ExtLane0 = false;
2091 for (User *U : Ext->users()) {
2092 uint64_t Idx;
2094 return false;
2096 continue;
2097 ExtCnt += 1;
2098 ExtLane0 |= !Idx;
2101 }
2102
2105 Instruction::And, ScalarDstTy, CostKind,
2108 (ExtCnt - ExtLane0) *
2110 Instruction::LShr, ScalarDstTy, CostKind,
2113 if (ScalarCost > VectorCost)
2114 return false;
2115
2116 Value *ScalarV = Ext->getOperand(0);
2118 &DT)) {
2119
2120
2121
2122
2123 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2124 bool AllExtractsTriggerUB = true;
2125 ExtractElementInst *LastExtract = nullptr;
2126 BasicBlock *ExtBB = Ext->getParent();
2127 for (User *U : Ext->users()) {
2130 AllExtractsTriggerUB = false;
2131 break;
2132 }
2134 if (!LastExtract || LastExtract->comesBefore(Extract))
2135 LastExtract = Extract;
2136 }
2137 if (ExtractedLanes.size() != DstTy->getNumElements() ||
2138 !AllExtractsTriggerUB ||
2142 }
2144 ScalarV,
2145 IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
2146 uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());
2147 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2148 uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy);
2150 Value *Mask = ConstantInt::get(PackedTy, EltBitMask);
2151 for (User *U : Ext->users()) {
2153 uint64_t Idx =
2155 uint64_t ShiftAmt =
2156 DL->isBigEndian()
2157 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2158 : (Idx * SrcEltSizeInBits);
2161 U->replaceAllUsesWith(And);
2162 }
2163 return true;
2164}
2165
2166
2167
2168
2169bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
2172 return false;
2173
2174
2175 if (DL->isBigEndian())
2176 return false;
2177
2178
2181 return false;
2182
2183
2184
2186 uint64_t ShAmtX = 0;
2191 return false;
2192
2194 uint64_t ShAmtY = 0;
2199 return false;
2200
2201
2202 if (ShAmtX > ShAmtY) {
2206 }
2207
2208
2209
2210 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2211 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2214 if (!MaskTy || SrcX->getType() != SrcY->getType() ||
2215 !MaskTy->getElementType()->isIntegerTy(1) ||
2216 MaskTy->getNumElements() != ShAmtDiff ||
2217 MaskTy->getNumElements() > (BitWidth / 2))
2218 return false;
2219
2221 auto *ConcatIntTy =
2222 Type::getIntNTy(Ty->getContext(), ConcatTy->getNumElements());
2223 auto *MaskIntTy = Type::getIntNTy(Ty->getContext(), ShAmtDiff);
2224
2226 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2227
2228
2231 OldCost +=
2235 OldCost += 2 * TTI.getCastInstrCost(Instruction::BitCast, MaskIntTy, MaskTy,
2237
2240 MaskTy, ConcatMask, CostKind);
2241 NewCost += TTI.getCastInstrCost(Instruction::BitCast, ConcatIntTy, ConcatTy,
2243 if (Ty != ConcatIntTy)
2246 if (ShAmtX > 0)
2248
2249 LLVM_DEBUG(dbgs() << "Found a concatenation of bitcasted bool masks: " << I
2250 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
2251 << "\n");
2252
2253 if (NewCost > OldCost)
2254 return false;
2255
2256
2257
2260
2262
2263 if (Ty != ConcatIntTy) {
2266 }
2267
2268 if (ShAmtX > 0) {
2271 }
2272
2273 replaceValue(I, *Result);
2274 return true;
2275}
2276
2277
2278
2279bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
2280 BinaryOperator *BinOp;
2281 ArrayRef OuterMask;
2284 return false;
2285
2286
2288 return false;
2289
2290 Value *Op00, *Op01, *Op10, *Op11;
2291 ArrayRef Mask0, Mask1;
2292 bool Match0 =
2295 bool Match1 =
2298 if (!Match0 && !Match1)
2299 return false;
2300
2301 Op00 = Match0 ? Op00 : BinOp->getOperand(0);
2302 Op01 = Match0 ? Op01 : BinOp->getOperand(0);
2303 Op10 = Match1 ? Op10 : BinOp->getOperand(1);
2304 Op11 = Match1 ? Op11 : BinOp->getOperand(1);
2305
2311 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2312 return false;
2313
2314 unsigned NumSrcElts = BinOpTy->getNumElements();
2315
2316
2317
2319 any_of(OuterMask, [NumSrcElts](int M) { return M >= (int)NumSrcElts; }))
2320 return false;
2321
2322
2323 SmallVector NewMask0, NewMask1;
2324 for (int M : OuterMask) {
2325 if (M < 0 || M >= (int)NumSrcElts) {
2328 } else {
2329 NewMask0.push_back(Match0 ? Mask0[M] : M);
2330 NewMask1.push_back(Match1 ? Mask1[M] : M);
2331 }
2332 }
2333
2334 unsigned NumOpElts = Op0Ty->getNumElements();
2335 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2336 all_of(NewMask0, [NumOpElts](int M) { return M < (int)NumOpElts; }) &&
2338 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2339 all_of(NewMask1, [NumOpElts](int M) { return M < (int)NumOpElts; }) &&
2341
2342
2346 BinOpTy, OuterMask, CostKind, 0, nullptr, {BinOp}, &I);
2347 if (Match0)
2351 if (Match1)
2355
2358
2359 if (!IsIdentity0)
2360 NewCost +=
2362 Op0Ty, NewMask0, CostKind, 0, nullptr, {Op00, Op01});
2363 if (!IsIdentity1)
2364 NewCost +=
2366 Op1Ty, NewMask1, CostKind, 0, nullptr, {Op10, Op11});
2367
2368 LLVM_DEBUG(dbgs() << "Found a shuffle feeding a shuffled binop: " << I
2369 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
2370 << "\n");
2371
2372
2373 if (NewCost > OldCost)
2374 return false;
2375
2381
2382
2384 NewInst->copyIRFlags(BinOp);
2385
2388 replaceValue(I, *NewBO);
2389 return true;
2390}
2391
2392
2393
2394bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
2395 ArrayRef OldMask;
2399 return false;
2400
2401
2402 if (LHS->getOpcode() != RHS->getOpcode())
2403 return false;
2404
2406 bool IsCommutative = false;
2412
2414 return false;
2415 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2420 } else
2421 return false;
2422
2426 if (!ShuffleDstTy || !BinResTy || !BinOpTy || X->getType() != Z->getType())
2427 return false;
2428
2429 unsigned NumSrcElts = BinOpTy->getNumElements();
2430
2431
2432 if (IsCommutative && X != Z && Y != W && (X == W || Y == Z))
2434
2435 auto ConvertToUnary = [NumSrcElts](int &M) {
2436 if (M >= (int)NumSrcElts)
2437 M -= NumSrcElts;
2438 };
2439
2440 SmallVector NewMask0(OldMask);
2442 if (X == Z) {
2446 }
2447
2448 SmallVector NewMask1(OldMask);
2450 if (Y == W) {
2454 }
2455
2456
2461 BinResTy, OldMask, CostKind, 0, nullptr, {LHS, RHS},
2462 &I);
2463
2464
2465
2466
2467
2471 ArrayRef InnerMask;
2473 m_Mask(InnerMask)))) &&
2474 InnerOp->getType() == Op->getType() &&
2476 [NumSrcElts](int M) { return M < (int)NumSrcElts; })) {
2477 for (int &M : Mask)
2478 if (Offset <= M && M < (int)(Offset + NumSrcElts)) {
2481 }
2483 Op = InnerOp;
2484 return true;
2485 }
2486 return false;
2487 };
2488 bool ReducedInstCount = false;
2489 ReducedInstCount |= MergeInner(X, 0, NewMask0, CostKind);
2490 ReducedInstCount |= MergeInner(Y, 0, NewMask1, CostKind);
2491 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0, CostKind);
2492 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1, CostKind);
2493
2494 auto *ShuffleCmpTy =
2500 nullptr, {Y, W});
2501
2503 NewCost +=
2505 } else {
2507 ShuffleDstTy, PredLHS, CostKind);
2508 }
2509
2510 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
2511 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
2512 << "\n");
2513
2514
2515
2518 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2519 return false;
2520
2526 : Builder.CreateCmp(PredLHS, Shuf0, Shuf1);
2527
2528
2530 NewInst->copyIRFlags(LHS);
2531 NewInst->andIRFlags(RHS);
2532 }
2533
2536 replaceValue(I, *NewBO);
2537 return true;
2538}
2539
2540
2541
2542
2543bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
2544 ArrayRef Mask;
2545 Value *C1, *T1, *F1, *C2, *T2, *F2;
2549 m_Mask(Mask))))
2550 return false;
2551
2554 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2555 return false;
2556
2559
2560 if (((SI0FOp == nullptr) != (SI1FOp == nullptr)) ||
2561 ((SI0FOp != nullptr) &&
2562 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2563 return false;
2564
2568 auto SelOp = Instruction::Select;
2573 OldCost +=
2575 {I.getOperand(0), I.getOperand(1)}, &I);
2576
2579 Mask, CostKind, 0, nullptr, {C1, C2});
2581 nullptr, {T1, T2});
2583 nullptr, {F1, F2});
2585 toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));
2588
2589 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I
2590 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
2591 << "\n");
2592 if (NewCost > OldCost)
2593 return false;
2594
2599
2600 if (SI0FOp)
2601 NewSel = Builder.CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2602 SI0FOp->getFastMathFlags());
2603 else
2604 NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2605
2607 Worklist.pushValue(ShuffleTrue);
2608 Worklist.pushValue(ShuffleFalse);
2609 replaceValue(I, *NewSel);
2610 return true;
2611}
2612
2613
2614
2615bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
2617 ArrayRef OldMask;
2619 return false;
2620
2621
2623
2626 if (!C0 || (IsBinaryShuffle && !C1))
2627 return false;
2628
2630
2631
2632
2633 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2634 return false;
2635
2636 if (IsBinaryShuffle) {
2637 if (C0->getSrcTy() != C1->getSrcTy())
2638 return false;
2639
2640 if (Opcode != C1->getOpcode()) {
2642 Opcode = Instruction::SExt;
2643 else
2644 return false;
2645 }
2646 }
2647
2651 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2652 return false;
2653
2654 unsigned NumSrcElts = CastSrcTy->getNumElements();
2655 unsigned NumDstElts = CastDstTy->getNumElements();
2656 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2657 "Only bitcasts expected to alter src/dst element counts");
2658
2659
2660
2661 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2662 (NumDstElts % NumSrcElts) != 0)
2663 return false;
2664
2665 SmallVector<int, 16> NewMask;
2666 if (NumSrcElts >= NumDstElts) {
2667
2668
2669 assert(NumSrcElts % NumDstElts == 0 && "Unexpected shuffle mask");
2670 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2672 } else {
2673
2674
2675 assert(NumDstElts % NumSrcElts == 0 && "Unexpected shuffle mask");
2676 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2678 return false;
2679 }
2680
2681 auto *NewShuffleDstTy =
2683
2684
2688
2690 if (IsBinaryShuffle)
2692 else
2694
2696 OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask,
2698
2700 CastSrcTy, NewMask, CostKind);
2704 NewCost += CostC0;
2705 if (IsBinaryShuffle) {
2709 OldCost += CostC1;
2711 NewCost += CostC1;
2712 }
2713
2714 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I
2715 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
2716 << "\n");
2717 if (NewCost > OldCost)
2718 return false;
2719
2721 if (IsBinaryShuffle)
2723 NewMask);
2724 else
2726
2727 Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);
2728
2729
2731 NewInst->copyIRFlags(C0);
2732 if (IsBinaryShuffle)
2733 NewInst->andIRFlags(C1);
2734 }
2735
2737 replaceValue(I, *Cast);
2738 return true;
2739}
2740
2741
2742
2743
2744
2745
2746
2747bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
2748 ArrayRef OuterMask;
2749 Value *OuterV0, *OuterV1;
2752 return false;
2753
2754 ArrayRef InnerMask0, InnerMask1;
2755 Value *X0, *X1, *Y0, *Y1;
2756 bool Match0 =
2758 bool Match1 =
2760 if (!Match0 && !Match1)
2761 return false;
2762
2763
2764
2765 SmallVector<int, 16> PoisonMask1;
2767 X1 = X0;
2768 Y1 = Y0;
2770 InnerMask1 = PoisonMask1;
2771 Match1 = true;
2772 }
2773
2774 X0 = Match0 ? X0 : OuterV0;
2775 Y0 = Match0 ? Y0 : OuterV0;
2776 X1 = Match1 ? X1 : OuterV1;
2777 Y1 = Match1 ? Y1 : OuterV1;
2781 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2783 return false;
2784
2785 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2786 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2787
2788
2789
2790
2791 SmallVector<int, 16> NewMask(OuterMask);
2792 Value *NewX = nullptr, *NewY = nullptr;
2793 for (int &M : NewMask) {
2794 Value *Src = nullptr;
2795 if (0 <= M && M < (int)NumImmElts) {
2796 Src = OuterV0;
2797 if (Match0) {
2799 Src = M >= (int)NumSrcElts ? Y0 : X0;
2800 M = M >= (int)NumSrcElts ? (M - NumSrcElts) : M;
2801 }
2802 } else if (M >= (int)NumImmElts) {
2803 Src = OuterV1;
2804 M -= NumImmElts;
2805 if (Match1) {
2807 Src = M >= (int)NumSrcElts ? Y1 : X1;
2808 M = M >= (int)NumSrcElts ? (M - NumSrcElts) : M;
2809 }
2810 }
2812 assert(0 <= M && M < (int)NumSrcElts && "Unexpected shuffle mask index");
2814
2815
2817 return false;
2819 continue;
2820 }
2821 if (!NewX || NewX == Src) {
2822 NewX = Src;
2823 continue;
2824 }
2825 if (!NewY || NewY == Src) {
2826 M += NumSrcElts;
2827 NewY = Src;
2828 continue;
2829 }
2830 return false;
2831 }
2832 }
2833
2834 if (!NewX)
2836 if (!NewY)
2838
2839
2841 replaceValue(I, *NewX);
2842 return true;
2843 }
2844
2845
2847 if (Match0)
2849
2851 if (Match1)
2853
2855
2856 InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;
2857
2858 bool IsUnary = all_of(NewMask, [&](int M) { return M < (int)NumSrcElts; });
2864 nullptr, {NewX, NewY});
2866 NewCost += InnerCost0;
2868 NewCost += InnerCost1;
2869
2870 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two shuffles: " << I
2871 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
2872 << "\n");
2873 if (NewCost > OldCost)
2874 return false;
2875
2877 replaceValue(I, *Shuf);
2878 return true;
2879}
2880
2881
2882
2883bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
2885 ArrayRef OldMask;
2887 m_Mask(OldMask))))
2888 return false;
2889
2892 if (!II0 || !II1)
2893 return false;
2894
2896 if (IID != II1->getIntrinsicID())
2897 return false;
2898
2901 if (!ShuffleDstTy || !II0Ty)
2902 return false;
2903
2905 return false;
2906
2907 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I)
2909 II0->getArgOperand(I) != II1->getArgOperand(I))
2910 return false;
2911
2916 II0Ty, OldMask, CostKind, 0, nullptr, {II0, II1}, &I);
2917
2920 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) {
2922 NewArgsTy.push_back(II0->getArgOperand(I)->getType());
2923 } else {
2926 ShuffleDstTy->getNumElements());
2930 CostKind, 0, nullptr, {II0->getArgOperand(I), II1->getArgOperand(I)});
2931 }
2932 }
2933 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2935
2936 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two intrinsics: " << I
2937 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
2938 << "\n");
2939
2940 if (NewCost > OldCost)
2941 return false;
2942
2944 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I)
2946 NewArgs.push_back(II0->getArgOperand(I));
2947 } else {
2949 II1->getArgOperand(I), OldMask);
2952 }
2954
2955
2957 NewInst->copyIRFlags(II0);
2958 NewInst->andIRFlags(II1);
2959 }
2960
2961 replaceValue(I, *NewIntrinsic);
2962 return true;
2963}
2964
2965
2966
2967bool VectorCombine::foldPermuteOfIntrinsic(Instruction &I) {
2969 ArrayRef Mask;
2971 return false;
2972
2974 if (!II0)
2975 return false;
2976
2979 if (!ShuffleDstTy || !IntrinsicSrcTy)
2980 return false;
2981
2982
2983 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
2984 if (any_of(Mask, [NumSrcElts](int M) { return M >= (int)NumSrcElts; }))
2985 return false;
2986
2989 return false;
2990
2991
2995 IntrinsicSrcTy, Mask, CostKind, 0, nullptr, {V0}, &I);
2996
2999 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) {
3001 NewArgsTy.push_back(II0->getArgOperand(I)->getType());
3002 } else {
3005 ShuffleDstTy->getNumElements());
3008 ArgTy, VecTy, Mask, CostKind, 0, nullptr,
3009 {II0->getArgOperand(I)});
3010 }
3011 }
3012 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3014
3015 LLVM_DEBUG(dbgs() << "Found a permute of intrinsic: " << I << "\n OldCost: "
3016 << OldCost << " vs NewCost: " << NewCost << "\n");
3017
3018 if (NewCost > OldCost)
3019 return false;
3020
3021
3023 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) {
3025 NewArgs.push_back(II0->getArgOperand(I));
3026 } else {
3030 }
3031 }
3032
3034
3037
3038 replaceValue(I, *NewIntrinsic);
3039 return true;
3040}
3041
3043
3046 unsigned NumElts =
3048 int M = SV->getMaskValue(Lane);
3049 if (M < 0)
3051 if (static_cast<unsigned>(M) < NumElts) {
3052 U = &SV->getOperandUse(0);
3053 Lane = M;
3054 } else {
3055 U = &SV->getOperandUse(1);
3056 Lane = M - NumElts;
3057 }
3058 }
3060}
3061
3066 auto [U, Lane] = IL;
3069 Lane)
3072 }
3073 return NItem;
3074}
3075
3076
3080 unsigned NumElts = Ty->getNumElements();
3081 if (Item.size() == NumElts || NumElts == 1 || Item.size() % NumElts != 0)
3082 return false;
3083
3084
3085
3087 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
3090 Ty, ConcatMask, CostKind) != 0)
3091 return false;
3092
3093 unsigned NumSlices = Item.size() / NumElts;
3094
3095
3097 return false;
3098 for (unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3099 Use *SliceV = Item[Slice * NumElts].first;
3100 if (!SliceV || SliceV->get()->getType() != Ty)
3101 return false;
3102 for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
3103 auto [V, Lane] = Item[Slice * NumElts + Elt];
3104 if (Lane != static_cast<int>(Elt) || SliceV->get() != V->get())
3105 return false;
3106 }
3107 }
3108 return true;
3109}
3110
3117 auto [FrontU, FrontLane] = Item.front();
3118
3119 if (IdentityLeafs.contains(FrontU)) {
3120 return FrontU->get();
3121 }
3122 if (SplatLeafs.contains(FrontU)) {
3124 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3125 }
3126 if (ConcatLeafs.contains(FrontU)) {
3127 unsigned NumElts =
3130 for (unsigned S = 0; S < Values.size(); ++S)
3131 Values[S] = Item[S * NumElts].first->get();
3132
3133 while (Values.size() > 1) {
3134 NumElts *= 2;
3136 std::iota(Mask.begin(), Mask.end(), 0);
3138 for (unsigned S = 0; S < NewValues.size(); ++S)
3139 NewValues[S] =
3140 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3141 Values = NewValues;
3142 }
3143 return Values[0];
3144 }
3145
3148 unsigned NumOps = I->getNumOperands() - (II ? 1 : 0);
3150 for (unsigned Idx = 0; Idx < NumOps; Idx++) {
3151 if (II &&
3153 Ops[Idx] = II->getOperand(Idx);
3154 continue;
3155 }
3157 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3158 Builder, TTI);
3159 }
3160
3162 for (const auto &Lane : Item)
3163 if (Lane.first)
3164 ValueList.push_back(Lane.first->get());
3165
3166 Type *DstTy =
3173 }
3175 auto *Value = Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
3178 }
3180 auto *Value = Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI);
3183 }
3185 auto *Value = Builder.CreateCast(CI->getOpcode(), Ops[0], DstTy);
3188 }
3189 if (II) {
3190 auto *Value = Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);
3193 }
3199}
3200
3201
3202
3203
3204bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
3206 if (!Ty || I.use_empty())
3207 return false;
3208
3210 for (unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
3212
3215 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3216 unsigned NumVisited = 0;
3217
3218 while (!Worklist.empty()) {
3220 return false;
3221
3223 auto [FrontU, FrontLane] = Item.front();
3224
3225
3226 if (!FrontU)
3227 return false;
3228
3229
3231 return X->getType() == Y->getType() &&
3233 };
3234
3235
3236 if (FrontLane == 0 &&
3238 Ty->getNumElements() &&
3240 Value *FrontV = Item.front().first->get();
3241 return .value().first || (IsEquiv(E.value().first->get(), FrontV) &&
3242 E.value().second == (int)E.index());
3243 })) {
3244 IdentityLeafs.insert(FrontU);
3245 continue;
3246 }
3247
3249 C && C->getSplatValue() &&
3251 Value *FrontV = Item.front().first->get();
3256 })) {
3257 SplatLeafs.insert(FrontU);
3258 continue;
3259 }
3260
3262 auto [FrontU, FrontLane] = Item.front();
3263 auto [U, Lane] = IL;
3264 return || (U->get() == FrontU->get() && Lane == FrontLane);
3265 })) {
3266 SplatLeafs.insert(FrontU);
3267 continue;
3268 }
3269
3270
3271
3272 auto CheckLaneIsEquivalentToFirst = [Item](InstLane IL) {
3273 Value *FrontV = Item.front().first->get();
3274 if (!IL.first)
3275 return true;
3276 Value *V = IL.first->get();
3278 return false;
3279 if (V->getValueID() != FrontV->getValueID())
3280 return false;
3282 if (CI->getPredicate() != cast(FrontV)->getPredicate())
3283 return false;
3285 if (CI->getSrcTy()->getScalarType() !=
3286 cast(FrontV)->getSrcTy()->getScalarType())
3287 return false;
3290 SI->getOperand(0)->getType() !=
3292 return false;
3294 return false;
3297 II->getIntrinsicID() ==
3299 ->hasOperandBundles());
3300 };
3301 if (all_of(drop_begin(Item), CheckLaneIsEquivalentToFirst)) {
3302
3304
3306 BO && BO->isIntDivRem())
3307 return false;
3310 continue;
3311 } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3312 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3314 continue;
3316
3319 if (DstTy && SrcTy &&
3320 SrcTy->getNumElements() == DstTy->getNumElements()) {
3322 continue;
3323 }
3328 continue;
3331 ->hasOperandBundles()) {
3332 for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
3334 &TTI)) {
3336 Value *FrontV = Item.front().first->get();
3340 }))
3341 return false;
3342 continue;
3343 }
3345 }
3346 continue;
3347 }
3348 }
3349
3351 ConcatLeafs.insert(FrontU);
3352 continue;
3353 }
3354
3355 return false;
3356 }
3357
3358 if (NumVisited <= 1)
3359 return false;
3360
3361 LLVM_DEBUG(dbgs() << "Found a superfluous identity shuffle: " << I << "\n");
3362
3363
3364
3367 ConcatLeafs, Builder, &TTI);
3368 replaceValue(I, *V);
3369 return true;
3370}
3371
3372
3373
3374
3375bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
3377 if ()
3378 return false;
3379 switch (II->getIntrinsicID()) {
3380 case Intrinsic::vector_reduce_add:
3381 case Intrinsic::vector_reduce_mul:
3382 case Intrinsic::vector_reduce_and:
3383 case Intrinsic::vector_reduce_or:
3384 case Intrinsic::vector_reduce_xor:
3385 case Intrinsic::vector_reduce_smin:
3386 case Intrinsic::vector_reduce_smax:
3387 case Intrinsic::vector_reduce_umin:
3388 case Intrinsic::vector_reduce_umax:
3389 break;
3390 default:
3391 return false;
3392 }
3393
3394
3395
3396
3397 std::queue<Value *> Worklist;
3398 SmallPtrSet<Value *, 4> Visited;
3399 ShuffleVectorInst *Shuffle = nullptr;
3401 Worklist.push(Op);
3402
3403 while (!Worklist.empty()) {
3404 Value *CV = Worklist.front();
3405 Worklist.pop();
3407 continue;
3408
3409
3411 continue;
3412
3414
3416 if (CI->isBinaryOp()) {
3417 for (auto *Op : CI->operand_values())
3418 Worklist.push(Op);
3419 continue;
3421 if (Shuffle && Shuffle != SV)
3422 return false;
3423 Shuffle = SV;
3424 continue;
3425 }
3426 }
3427
3428
3429 return false;
3430 }
3431
3432 if (!Shuffle)
3433 return false;
3434
3435
3436
3437
3438 for (auto *V : Visited)
3439 for (auto *U : V->users())
3440 if (!Visited.contains(U) && U != &I)
3441 return false;
3442
3443 FixedVectorType *VecType =
3445 if (!VecType)
3446 return false;
3447 FixedVectorType *ShuffleInputType =
3449 if (!ShuffleInputType)
3450 return false;
3451 unsigned NumInputElts = ShuffleInputType->getNumElements();
3452
3453
3454
3455 SmallVector ConcatMask;
3457 sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
3458 bool UsesSecondVec =
3459 any_of(ConcatMask, [&](int M) { return M >= (int)NumInputElts; });
3460
3466 ShuffleInputType, ConcatMask, CostKind);
3467
3468 LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
3469 << "\n");
3470 LLVM_DEBUG(dbgs() << " OldCost: " << OldCost << " vs NewCost: " << NewCost
3471 << "\n");
3472 bool MadeChanges = false;
3473 if (NewCost < OldCost) {
3477 LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");
3478 replaceValue(*Shuffle, *NewShuffle);
3479 return true;
3480 }
3481
3482
3483
3484 MadeChanges |= foldSelectShuffle(*Shuffle, true);
3485 return MadeChanges;
3486}
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
3531
3532 std::queue<Value *> InstWorklist;
3534
3535
3536 std::optional CommonCallOp = std::nullopt;
3537 std::optionalInstruction::BinaryOps CommonBinOp = std::nullopt;
3538
3539 bool IsFirstCallOrBinInst = true;
3540 bool ShouldBeCallOrBinInst = true;
3541
3542
3543
3544
3545
3546 SmallVector<Value *, 2> PrevVecV(2, nullptr);
3547
3550 return false;
3551
3553 if (!FVT)
3554 return false;
3555
3556 int64_t VecSize = FVT->getNumElements();
3557 if (VecSize < 2)
3558 return false;
3559
3560
3561
3562 unsigned int NumLevels = Log2_64_Ceil(VecSize), VisitedCnt = 0;
3563 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573 for (int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3574 Cur = (Cur + 1) / 2, --Mask) {
3575 if (Cur & 1)
3576 ExpectedParityMask |= (1ll << Mask);
3577 }
3578
3579 InstWorklist.push(VecOpEE);
3580
3581 while (!InstWorklist.empty()) {
3582 Value *CI = InstWorklist.front();
3583 InstWorklist.pop();
3584
3586 if (!ShouldBeCallOrBinInst)
3587 return false;
3588
3589 if (!IsFirstCallOrBinInst &&
3590 any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
3591 return false;
3592
3593
3594
3595 if (II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3596 return false;
3597 IsFirstCallOrBinInst = false;
3598
3599 if (!CommonCallOp)
3600 CommonCallOp = II->getIntrinsicID();
3601 if (II->getIntrinsicID() != *CommonCallOp)
3602 return false;
3603
3604 switch (II->getIntrinsicID()) {
3605 case Intrinsic::umin:
3606 case Intrinsic::umax:
3607 case Intrinsic::smin:
3608 case Intrinsic::smax: {
3609 auto *Op0 = II->getOperand(0);
3610 auto *Op1 = II->getOperand(1);
3611 PrevVecV[0] = Op0;
3612 PrevVecV[1] = Op1;
3613 break;
3614 }
3615 default:
3616 return false;
3617 }
3618 ShouldBeCallOrBinInst ^= 1;
3619
3620 IntrinsicCostAttributes ICA(
3621 *CommonCallOp, II->getType(),
3622 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3624
3625
3626
3628 std::swap(PrevVecV[0], PrevVecV[1]);
3629 InstWorklist.push(PrevVecV[1]);
3630 InstWorklist.push(PrevVecV[0]);
3632
3633
3634 if (!ShouldBeCallOrBinInst)
3635 return false;
3636
3637 if (!IsFirstCallOrBinInst &&
3638 any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
3639 return false;
3640
3641 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3642 return false;
3643 IsFirstCallOrBinInst = false;
3644
3645 if (!CommonBinOp)
3646 CommonBinOp = BinOp->getOpcode();
3647
3648 if (BinOp->getOpcode() != *CommonBinOp)
3649 return false;
3650
3651 switch (*CommonBinOp) {
3652 case BinaryOperator::Add:
3653 case BinaryOperator::Mul:
3654 case BinaryOperator::Or:
3655 case BinaryOperator::And:
3656 case BinaryOperator::Xor: {
3659 PrevVecV[0] = Op0;
3660 PrevVecV[1] = Op1;
3661 break;
3662 }
3663 default:
3664 return false;
3665 }
3666 ShouldBeCallOrBinInst ^= 1;
3667
3668 OrigCost +=
3670
3672 std::swap(PrevVecV[0], PrevVecV[1]);
3673 InstWorklist.push(PrevVecV[1]);
3674 InstWorklist.push(PrevVecV[0]);
3676
3677
3678 if (ShouldBeCallOrBinInst ||
3679 any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
3680 return false;
3681
3682 if (SVInst != PrevVecV[1])
3683 return false;
3684
3685 ArrayRef CurMask;
3687 m_Mask(CurMask))))
3688 return false;
3689
3690
3691 for (int Mask = 0, MaskSize = CurMask.size(); Mask != MaskSize; ++Mask) {
3692 if (Mask < ShuffleMaskHalf &&
3693 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3694 return false;
3695 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3696 return false;
3697 }
3698
3699
3700 ShuffleMaskHalf *= 2;
3701 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3702 ExpectedParityMask >>= 1;
3703
3705 SVInst->getType(), SVInst->getType(),
3707
3708 VisitedCnt += 1;
3709 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3710 break;
3711
3712 ShouldBeCallOrBinInst ^= 1;
3713 } else {
3714 return false;
3715 }
3716 }
3717
3718
3719 if (ShouldBeCallOrBinInst)
3720 return false;
3721
3722 assert(VecSize != -1 && "Expected Match for Vector Size");
3723
3724 Value *FinalVecV = PrevVecV[0];
3725 if (!FinalVecV)
3726 return false;
3727
3729
3733 if (!ReducedOp)
3734 return false;
3735
3736 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3738
3739 if (NewCost >= OrigCost)
3740 return false;
3741
3742 auto *ReducedResult =
3744 replaceValue(I, *ReducedResult);
3745
3746 return true;
3747}
3748
3749
3750
3751
3752
3753bool VectorCombine::foldCastFromReductions(Instruction &I) {
3755 if ()
3756 return false;
3757
3758 bool TruncOnly = false;
3760 switch (IID) {
3761 case Intrinsic::vector_reduce_add:
3762 case Intrinsic::vector_reduce_mul:
3763 TruncOnly = true;
3764 break;
3765 case Intrinsic::vector_reduce_and:
3766 case Intrinsic::vector_reduce_or:
3767 case Intrinsic::vector_reduce_xor:
3768 break;
3769 default:
3770 return false;
3771 }
3772
3774 Value *ReductionSrc = I.getOperand(0);
3775
3779 return false;
3780
3781 auto CastOpc =
3783
3786 Type *ResultTy = I.getType();
3787
3789 ReductionOpc, ReductionSrcTy, std::nullopt, CostKind);
3796 TTI.getCastInstrCost(CastOpc, ResultTy, ReductionSrcTy->getScalarType(),
3798
3799 if (OldCost <= NewCost || !NewCost.isValid())
3800 return false;
3801
3803 II->getIntrinsicID(), {Src});
3804 Value *NewCast = Builder.CreateCast(CastOpc, NewReduction, ResultTy);
3805 replaceValue(I, *NewCast);
3806 return true;
3807}
3808
3809
3810
3811
3812
3814 constexpr unsigned MaxVisited = 32;
3817 bool FoundReduction = false;
3818
3820 while (!WorkList.empty()) {
3822 for (User *U : I->users()) {
3824 if (!UI || !Visited.insert(UI).second)
3825 continue;
3826 if (Visited.size() > MaxVisited)
3827 return false;
3829
3830 if (FoundReduction)
3831 return false;
3832 switch (II->getIntrinsicID()) {
3833 case Intrinsic::vector_reduce_add:
3834 case Intrinsic::vector_reduce_mul:
3835 case Intrinsic::vector_reduce_and:
3836 case Intrinsic::vector_reduce_or:
3837 case Intrinsic::vector_reduce_xor:
3838 case Intrinsic::vector_reduce_smin:
3839 case Intrinsic::vector_reduce_smax:
3840 case Intrinsic::vector_reduce_umin:
3841 case Intrinsic::vector_reduce_umax:
3842 FoundReduction = true;
3843 continue;
3844 default:
3845 return false;
3846 }
3847 }
3848
3850 return false;
3851
3853 }
3854 }
3855 return FoundReduction;
3856}
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
3873 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3875 return false;
3876
3881 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3882 auto checkSVNonOpUses = [&](Instruction *I) {
3883 if ( || I->getOperand(0)->getType() != VT)
3884 return true;
3885 return any_of(I->users(), [&](User *U) {
3886 return U != Op0 && U != Op1 &&
3887 !(isa(U) &&
3888 (InputShuffles.contains(cast(U)) ||
3889 isInstructionTriviallyDead(cast(U))));
3890 });
3891 };
3892 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3893 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3894 return false;
3895
3896
3897
3898
3900 auto collectShuffles = [&](Instruction *I) {
3901 for (auto *U : I->users()) {
3903 if (!SV || SV->getType() != VT)
3904 return false;
3905 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3906 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3907 return false;
3910 }
3911 return true;
3912 };
3913 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3914 return false;
3915
3916
3917 if (FromReduction && Shuffles.size() > 1)
3918 return false;
3919
3920
3921
3922 if (!FromReduction) {
3923 for (ShuffleVectorInst *SV : Shuffles) {
3924 for (auto *U : SV->users()) {
3927 Shuffles.push_back(SSV);
3928 }
3929 }
3930 }
3931
3932
3933
3934
3935
3936
3939 int MaxV1Elt = 0, MaxV2Elt = 0;
3940 unsigned NumElts = VT->getNumElements();
3941 for (ShuffleVectorInst *SVN : Shuffles) {
3942 SmallVector Mask;
3943 SVN->getShuffleMask(Mask);
3944
3945
3946
3947 Value *SVOp0 = SVN->getOperand(0);
3948 Value *SVOp1 = SVN->getOperand(1);
3953 for (int &Elem : Mask) {
3954 if (Elem >= static_cast<int>(SSV->getShuffleMask().size()))
3955 return false;
3956 Elem = Elem < 0 ? Elem : SSV->getMaskValue(Elem);
3957 }
3958 }
3959 if (SVOp0 == Op1 && SVOp1 == Op0) {
3962 }
3963 if (SVOp0 != Op0 || SVOp1 != Op1)
3964 return false;
3965
3966
3967
3968
3969 SmallVector ReconstructMask;
3970 for (unsigned I = 0; I < Mask.size(); I++) {
3971 if (Mask[I] < 0) {
3973 } else if (Mask[I] < static_cast<int>(NumElts)) {
3974 MaxV1Elt = std::max(MaxV1Elt, Mask[I]);
3975 auto It = find_if(V1, [&](const std::pair<int, int> &A) {
3976 return Mask[I] == A.first;
3977 });
3978 if (It != V1.end())
3980 else {
3983 }
3984 } else {
3985 MaxV2Elt = std::max(MaxV2Elt, Mask[I] - NumElts);
3986 auto It = find_if(V2, [&](const std::pair<int, int> &A) {
3987 return Mask[I] - static_cast<int>(NumElts) == A.first;
3988 });
3989 if (It != V2.end())
3990 ReconstructMask.push_back(NumElts + It - V2.begin());
3991 else {
3994 }
3995 }
3996 }
3997
3998
3999
4000 if (FromReduction)
4001 sort(ReconstructMask);
4002 OrigReconstructMasks.push_back(std::move(ReconstructMask));
4003 }
4004
4005
4006
4007
4008
4010 (MaxV1Elt == static_cast<int>(V1.size()) - 1 &&
4011 MaxV2Elt == static_cast<int>(V2.size()) - 1))
4012 return false;
4013
4014
4015
4016
4017 auto GetBaseMaskValue = [&](Instruction *I, int M) {
4019 if (!SV)
4020 return M;
4023 if (InputShuffles.contains(SSV))
4024 return SSV->getMaskValue(SV->getMaskValue(M));
4025 return SV->getMaskValue(M);
4026 };
4027
4028
4029
4030
4031
4032 auto SortBase = [&](Instruction *A, std::pair<int, int> X,
4033 std::pair<int, int> Y) {
4034 int MXA = GetBaseMaskValue(A, X.first);
4035 int MYA = GetBaseMaskValue(A, Y.first);
4036 return MXA < MYA;
4037 };
4038 stable_sort(V1, [&](std::pair<int, int> A, std::pair<int, int> B) {
4039 return SortBase(SVI0A, A, B);
4040 });
4041 stable_sort(V2, [&](std::pair<int, int> A, std::pair<int, int> B) {
4042 return SortBase(SVI1A, A, B);
4043 });
4044
4045
4047 for (const auto &Mask : OrigReconstructMasks) {
4048 SmallVector ReconstructMask;
4049 for (int M : Mask) {
4051 auto It = find_if(V, [M](auto A) { return A.second == M; });
4052 assert(It != V.end() && "Expected all entries in Mask");
4053 return std::distance(V.begin(), It);
4054 };
4055 if (M < 0)
4057 else if (M < static_cast<int>(NumElts)) {
4058 ReconstructMask.push_back(FindIndex(V1, M));
4059 } else {
4060 ReconstructMask.push_back(NumElts + FindIndex(V2, M));
4061 }
4062 }
4063 ReconstructMasks.push_back(std::move(ReconstructMask));
4064 }
4065
4066
4067
4068 SmallVector V1A, V1B, V2A, V2B;
4069 for (unsigned I = 0; I < V1.size(); I++) {
4070 V1A.push_back(GetBaseMaskValue(SVI0A, V1[I].first));
4071 V1B.push_back(GetBaseMaskValue(SVI0B, V1[I].first));
4072 }
4073 for (unsigned I = 0; I < V2.size(); I++) {
4074 V2A.push_back(GetBaseMaskValue(SVI1A, V2[I].first));
4075 V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first));
4076 }
4077 while (V1A.size() < NumElts) {
4080 }
4081 while (V2A.size() < NumElts) {
4084 }
4085
4088 if (!SV)
4089 return C;
4093 VT, VT, SV->getShuffleMask(), CostKind);
4094 };
4096 return C +
4098 };
4099
4100 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
4101 unsigned MaxVectorSize =
4103 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
4104 if (MaxElementsInVector == 0)
4105 return false;
4106
4107
4108
4109
4110
4111
4112
4113 std::set<SmallVector<int, 4>> UniqueShuffles;
4114 auto AddShuffleMaskAdjustedCost = [&](InstructionCost C, ArrayRef Mask) {
4115
4116 auto ShuffleCost =
4118 unsigned NumFullVectors = Mask.size() / MaxElementsInVector;
4119 if (NumFullVectors < 2)
4120 return C + ShuffleCost;
4121 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
4122 unsigned NumUniqueGroups = 0;
4123 unsigned NumGroups = Mask.size() / MaxElementsInVector;
4124
4125
4126 for (unsigned I = 0; I < NumFullVectors; ++I) {
4127 for (unsigned J = 0; J < MaxElementsInVector; ++J)
4128 SubShuffle[J] = Mask[MaxElementsInVector * I + J];
4129 if (UniqueShuffles.insert(SubShuffle).second)
4130 NumUniqueGroups += 1;
4131 }
4132 return C + ShuffleCost * NumUniqueGroups / NumGroups;
4133 };
4136 if (!SV)
4137 return C;
4138 SmallVector<int, 16> Mask;
4139 SV->getShuffleMask(Mask);
4140 return AddShuffleMaskAdjustedCost(C, Mask);
4141 };
4142
4143 auto AllShufflesHaveSameOperands =
4144 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
4145 if (InputShuffles.size() < 2)
4146 return false;
4147 ShuffleVectorInst *FirstSV =
4149 if (!FirstSV)
4150 return false;
4151
4153 return std::all_of(
4154 std::next(InputShuffles.begin()), InputShuffles.end(),
4155 [&](Instruction *I) {
4156 ShuffleVectorInst *SV = dyn_cast(I);
4157 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
4158 });
4159 };
4160
4161
4162
4166 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
4168 if (AllShufflesHaveSameOperands(InputShuffles)) {
4169 UniqueShuffles.clear();
4170 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4172 } else {
4173 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4175 }
4176
4177
4178
4179 FixedVectorType *Op0SmallVT =
4181 FixedVectorType *Op1SmallVT =
4186 UniqueShuffles.clear();
4187 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
4189 std::set<SmallVector> OutputShuffleMasks({V1A, V1B, V2A, V2B});
4190 CostAfter +=
4191 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
4193
4194 LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n");
4196 << " vs CostAfter: " << CostAfter << "\n");
4197 if (CostBefore < CostAfter ||
4199 return false;
4200
4201
4204 if (!SV)
4205 return I;
4208 if (InputShuffles.contains(SSV))
4210 return SV->getOperand(Op);
4211 };
4212 Builder.SetInsertPoint(*SVI0A->getInsertionPointAfterDef());
4214 GetShuffleOperand(SVI0A, 1), V1A);
4215 Builder.SetInsertPoint(*SVI0B->getInsertionPointAfterDef());
4217 GetShuffleOperand(SVI0B, 1), V1B);
4218 Builder.SetInsertPoint(*SVI1A->getInsertionPointAfterDef());
4220 GetShuffleOperand(SVI1A, 1), V2A);
4221 Builder.SetInsertPoint(*SVI1B->getInsertionPointAfterDef());
4223 GetShuffleOperand(SVI1B, 1), V2B);
4226 NSV0A, NSV0B);
4228 I->copyIRFlags(Op0, true);
4231 NSV1A, NSV1B);
4233 I->copyIRFlags(Op1, true);
4234
4235 for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {
4238 replaceValue(*Shuffles[S], *NSV, false);
4239 }
4240
4241 Worklist.pushValue(NSV0A);
4242 Worklist.pushValue(NSV0B);
4243 Worklist.pushValue(NSV1A);
4244 Worklist.pushValue(NSV1B);
4245 return true;
4246}
4247
4248
4249
4250
4251
4252
4253
4254bool VectorCombine::shrinkType(Instruction &I) {
4255 Value *ZExted, *OtherOperand;
4257 m_Value(OtherOperand))) &&
4259 return false;
4260
4261 Value *ZExtOperand = I.getOperand(I.getOperand(0) == OtherOperand ? 1 : 0);
4262
4265 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4266
4267 if (I.getOpcode() == Instruction::LShr) {
4268
4269
4272 return false;
4273 } else {
4274
4275
4278 return false;
4279 }
4280
4281
4282
4284 Instruction::ZExt, BigTy, SmallTy,
4285 TargetTransformInfo::CastContextHint::None, CostKind);
4288
4289
4290 for (User *U : ZExtOperand->users()) {
4292 if (UI == &I) {
4293 CurrentCost +=
4295 ShrinkCost +=
4297 ShrinkCost += ZExtCost;
4298 continue;
4299 }
4300
4302 return false;
4303
4304
4307 return false;
4308
4310 ShrinkCost +=
4312 ShrinkCost += ZExtCost;
4313 }
4314
4315
4316
4319 Instruction::Trunc, SmallTy, BigTy,
4320 TargetTransformInfo::CastContextHint::None, CostKind);
4321
4322
4323
4324
4325 if (ShrinkCost > CurrentCost)
4326 return false;
4327
4329 Value *Op0 = ZExted;
4331
4332 if (I.getOperand(0) == OtherOperand)
4334 Value *NewBinOp =
4339 replaceValue(I, *NewZExtr);
4340 return true;
4341}
4342
4343
4344
4345bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
4346 Value *DstVec, *SrcVec;
4347 uint64_t ExtIdx, InsIdx;
4352 return false;
4353
4356
4357 if (!DstVecTy || !SrcVecTy ||
4358 SrcVecTy->getElementType() != DstVecTy->getElementType())
4359 return false;
4360
4361 unsigned NumDstElts = DstVecTy->getNumElements();
4362 unsigned NumSrcElts = SrcVecTy->getNumElements();
4363 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4364 return false;
4365
4366
4369
4370 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4371 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4373 if (NeedDstSrcSwap) {
4375 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4376 Mask[InsIdx] = 0;
4377 else
4378 Mask[InsIdx] = ExtIdx;
4380 } else {
4382 std::iota(Mask.begin(), Mask.end(), 0);
4383 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4384 Mask[InsIdx] = NumDstElts;
4385 else
4386 Mask[InsIdx] = ExtIdx + NumDstElts;
4387 }
4388
4389
4397
4399 SmallVector ExtToVecMask;
4400 if (!NeedExpOrNarrow) {
4401
4402
4405 nullptr, {DstVec, SrcVec});
4406 } else {
4407
4408
4409
4411 if (IsExtIdxInBounds)
4412 ExtToVecMask[ExtIdx] = ExtIdx;
4413 else
4414 ExtToVecMask[0] = ExtIdx;
4415
4417 DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
4419 }
4420
4421 if (!Ext->hasOneUse())
4422 NewCost += ExtCost;
4423
4424 LLVM_DEBUG(dbgs() << "Found a insert/extract shuffle-like pair: " << I
4425 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
4426 << "\n");
4427
4428 if (OldCost < NewCost)
4429 return false;
4430
4431 if (NeedExpOrNarrow) {
4432 if (!NeedDstSrcSwap)
4434 else
4436 }
4437
4438
4442 }
4443
4445 replaceValue(I, *Shuf);
4446
4447 return true;
4448}
4449
4450
4451
4452
4453
4454bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
4455 const APInt *SplatVal0, *SplatVal1;
4458 return false;
4459
4460 LLVM_DEBUG(dbgs() << "VC: Folding interleave2 with two splats: " << I
4461 << "\n");
4462
4463 auto *VTy =
4465 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4466 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4467
4468
4469
4470
4471
4475 LLVM_DEBUG(dbgs() << "VC: The cost to cast from " << *ExtVTy << " to "
4476 << *I.getType() << " is too high.\n");
4477 return false;
4478 }
4479
4480 APInt NewSplatVal = SplatVal1->zext(Width * 2);
4481 NewSplatVal <<= Width;
4482 NewSplatVal |= SplatVal0->zext(Width * 2);
4484 ExtVTy->getElementCount(), ConstantInt::get(F.getContext(), NewSplatVal));
4485
4487 replaceValue(I, *Builder.CreateBitCast(NewSplat, I.getType()));
4488 return true;
4489}
4490
4491
4492bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
4494 if (!OldLoad || !OldLoad->isSimple())
4495 return false;
4496
4498 if (!OldLoadTy)
4499 return false;
4500
4501 unsigned const OldNumElements = OldLoadTy->getNumElements();
4502
4503
4504
4505
4506
4507 using IndexRange = std::pair<int, int>;
4508 auto GetIndexRangeInShuffles = [&]() -> std::optional {
4509 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4510 for (llvm::Use &Use : I.uses()) {
4511
4512 User *Shuffle = Use.getUser();
4513 ArrayRef Mask;
4514
4515 if ((Shuffle,
4517 return std::nullopt;
4518
4519
4521 continue;
4522
4523
4524 for (int Index : Mask) {
4525 if (Index >= 0 && Index < static_cast<int>(OldNumElements)) {
4526 OutputRange.first = std::min(Index, OutputRange.first);
4527 OutputRange.second = std::max(Index, OutputRange.second);
4528 }
4529 }
4530 }
4531
4532 if (OutputRange.second < OutputRange.first)
4533 return std::nullopt;
4534
4535 return OutputRange;
4536 };
4537
4538
4539 if (std::optional Indices = GetIndexRangeInShuffles()) {
4540 unsigned const NewNumElements = Indices->second + 1u;
4541
4542
4543
4544 if (NewNumElements < OldNumElements) {
4547
4548
4549 Type *ElemTy = OldLoadTy->getElementType();
4551 Value *PtrOp = OldLoad->getPointerOperand();
4552
4554 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4555 OldLoad->getPointerAddressSpace(), CostKind);
4557 TTI.getMemoryOpCost(Instruction::Load, NewLoadTy, OldLoad->getAlign(),
4558 OldLoad->getPointerAddressSpace(), CostKind);
4559
4560 using UseEntry = std::pair<ShuffleVectorInst *, std::vector>;
4562 unsigned const MaxIndex = NewNumElements * 2u;
4563
4564 for (llvm::Use &Use : I.uses()) {
4566 ArrayRef OldMask = Shuffle->getShuffleMask();
4567
4568
4569 NewUses.push_back({Shuffle, OldMask});
4570
4571
4572 for (int Index : OldMask) {
4573 if (Index >= static_cast<int>(MaxIndex))
4574 return false;
4575 }
4576
4577
4578 OldCost +=
4580 OldLoadTy, OldMask, CostKind);
4581 NewCost +=
4583 NewLoadTy, OldMask, CostKind);
4584 }
4585
4587 dbgs() << "Found a load used only by shufflevector instructions: "
4588 << I << "\n OldCost: " << OldCost
4589 << " vs NewCost: " << NewCost << "\n");
4590
4591 if (OldCost < NewCost || !NewCost.isValid())
4592 return false;
4593
4594
4596 Builder.CreateAlignedLoad(NewLoadTy, PtrOp, OldLoad->getAlign()));
4597 NewLoad->copyMetadata(I);
4598
4599
4600 for (UseEntry &Use : NewUses) {
4601 ShuffleVectorInst *Shuffle = Use.first;
4602 std::vector &NewMask = Use.second;
4603
4608
4609 replaceValue(*Shuffle, *NewShuffle, false);
4610 }
4611
4612 return true;
4613 }
4614 }
4615 return false;
4616}
4617
4618
4619
4620
4621
4622bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
4624 if (!Phi || Phi->getNumIncomingValues() != 2u)
4625 return false;
4626
4628 ArrayRef Mask0;
4629 ArrayRef Mask1;
4630
4631 if ((Phi->getOperand(0u),
4635 return false;
4636
4638
4639
4642 auto const InputNumElements = InputVT->getNumElements();
4643
4644 if (InputNumElements >= ResultVT->getNumElements())
4645 return false;
4646
4647
4648
4649 SmallVector<int, 16> NewMask;
4651
4652 for (auto [M0, M1] : zip(Mask0, Mask1)) {
4655 else if (M0 == -1 && M1 == -1)
4656 continue;
4657 else
4658 return false;
4659 }
4660
4661
4662
4663
4665 return false;
4666
4667
4668 int MaskOffset = NewMask[0u];
4669 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
4670 NewMask.clear();
4671
4672 for (unsigned I = 0u; I < InputNumElements; ++I) {
4674 Index = (Index + 1u) % InputNumElements;
4675 }
4676
4677
4679 auto OldCost =
4684
4685 LLVM_DEBUG(dbgs() << "Found a phi of mergeable shuffles: " << I
4686 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
4687 << "\n");
4688
4689 if (NewCost > OldCost)
4690 return false;
4691
4692
4698
4701 auto *NewPhi = Builder.CreatePHI(NewShuf0->getType(), 2u);
4702 NewPhi->addIncoming(NewShuf0, Phi->getIncomingBlock(0u));
4703 NewPhi->addIncoming(Op, Phi->getIncomingBlock(1u));
4704
4705 Builder.SetInsertPoint(*NewPhi->getInsertionPointAfterDef());
4708
4709 replaceValue(*Phi, *NewShuf1);
4710 return true;
4711}
4712
4713
4714
4715bool VectorCombine::run() {
4717 return false;
4718
4719
4721 return false;
4722
4723 LLVM_DEBUG(dbgs() << "\n\nVECTORCOMBINE on " << F.getName() << "\n");
4724
4729 auto Opcode = I.getOpcode();
4730
4732
4733
4734
4735
4736
4737 if (IsFixedVectorType) {
4738 switch (Opcode) {
4739 case Instruction::InsertElement:
4740 if (vectorizeLoadInsert(I))
4741 return true;
4742 break;
4743 case Instruction::ShuffleVector:
4744 if (widenSubvectorLoad(I))
4745 return true;
4746 break;
4747 default:
4748 break;
4749 }
4750 }
4751
4752
4753
4754 if (IsVectorType) {
4755 if (scalarizeOpOrCmp(I))
4756 return true;
4757 if (scalarizeLoad(I))
4758 return true;
4759 if (scalarizeExtExtract(I))
4760 return true;
4761 if (scalarizeVPIntrinsic(I))
4762 return true;
4763 if (foldInterleaveIntrinsics(I))
4764 return true;
4765 }
4766
4767 if (Opcode == Instruction::Store)
4768 if (foldSingleElementStore(I))
4769 return true;
4770
4771
4772 if (TryEarlyFoldsOnly)
4773 return false;
4774
4775
4776
4777
4778
4779 if (IsFixedVectorType) {
4780 switch (Opcode) {
4781 case Instruction::InsertElement:
4782 if (foldInsExtFNeg(I))
4783 return true;
4784 if (foldInsExtBinop(I))
4785 return true;
4786 if (foldInsExtVectorToShuffle(I))
4787 return true;
4788 break;
4789 case Instruction::ShuffleVector:
4790 if (foldPermuteOfBinops(I))
4791 return true;
4792 if (foldShuffleOfBinops(I))
4793 return true;
4794 if (foldShuffleOfSelects(I))
4795 return true;
4796 if (foldShuffleOfCastops(I))
4797 return true;
4798 if (foldShuffleOfShuffles(I))
4799 return true;
4800 if (foldPermuteOfIntrinsic(I))
4801 return true;
4802 if (foldShuffleOfIntrinsics(I))
4803 return true;
4804 if (foldSelectShuffle(I))
4805 return true;
4806 if (foldShuffleToIdentity(I))
4807 return true;
4808 break;
4809 case Instruction::Load:
4810 if (shrinkLoadForShuffles(I))
4811 return true;
4812 break;
4813 case Instruction::BitCast:
4814 if (foldBitcastShuffle(I))
4815 return true;
4816 break;
4817 case Instruction::And:
4818 case Instruction::Or:
4819 case Instruction::Xor:
4820 if (foldBitOpOfCastops(I))
4821 return true;
4822 if (foldBitOpOfCastConstant(I))
4823 return true;
4824 break;
4825 case Instruction::PHI:
4826 if (shrinkPhiOfShuffles(I))
4827 return true;
4828 break;
4829 default:
4830 if (shrinkType(I))
4831 return true;
4832 break;
4833 }
4834 } else {
4835 switch (Opcode) {
4836 case Instruction::Call:
4837 if (foldShuffleFromReductions(I))
4838 return true;
4839 if (foldCastFromReductions(I))
4840 return true;
4841 break;
4842 case Instruction::ExtractElement:
4843 if (foldShuffleChainsToReduce(I))
4844 return true;
4845 break;
4846 case Instruction::ICmp:
4847 case Instruction::FCmp:
4848 if (foldExtractExtract(I))
4849 return true;
4850 break;
4851 case Instruction::Or:
4852 if (foldConcatOfBoolMasks(I))
4853 return true;
4854 [[fallthrough]];
4855 default:
4857 if (foldExtractExtract(I))
4858 return true;
4859 if (foldExtractedCmps(I))
4860 return true;
4861 if (foldBinopOfReductions(I))
4862 return true;
4863 }
4864 break;
4865 }
4866 }
4867 return false;
4868 };
4869
4870 bool MadeChange = false;
4871 for (BasicBlock &BB : F) {
4872
4874 continue;
4875
4876
4877
4878
4879
4881 while (I) {
4883 if (->isDebugOrPseudoInst())
4884 MadeChange |= FoldInst(*I);
4885 I = NextInst;
4886 }
4887 }
4888
4889 NextInst = nullptr;
4890
4891 while (!Worklist.isEmpty()) {
4893 if ()
4894 continue;
4895
4898 continue;
4899 }
4900
4901 MadeChange |= FoldInst(*I);
4902 }
4903
4904 return MadeChange;
4905}
4906
4915 TryEarlyFoldsOnly);
4920 return PA;
4921}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
Definition VectorCombine.cpp:3111
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
Definition VectorCombine.cpp:3077
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
Definition VectorCombine.cpp:1548
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
Definition VectorCombine.cpp:3063
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
Definition VectorCombine.cpp:566
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
Definition VectorCombine.cpp:1785
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
Definition VectorCombine.cpp:3813
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
Definition VectorCombine.cpp:1734
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
Definition VectorCombine.cpp:3044
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
Definition VectorCombine.cpp:210
static const unsigned InvalidIndex
Definition VectorCombine.cpp:70
std::pair< Use *, int > InstLane
Definition VectorCombine.cpp:3042
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
Definition VectorCombine.cpp:581
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
Definition VectorCombine.cpp:1665
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const
LLVM_ABI InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const
Estimate the overhead of scalarizing an instruction.
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI TypeSize getRegisterBitWidth(RegisterKind K) const
LLVM_ABI InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI bool allowVectorElementIndexingUsingGEP() const
Returns true if GEP should not be used to index into vectors for this target.
LLVM_ABI InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
LLVM_ABI InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
LLVM_ABI InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
LLVM_ABI unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI unsigned getMinVectorRegisterBitWidth() const
LLVM_ABI InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const
LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ None
The cast is not used with a load/store of any kind.
@ OK_NonUniformConstantValue
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition VectorCombine.cpp:4907
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.