LLVM: lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
22#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include
25
26using namespace llvm;
28
29#define DEBUG_TYPE "AMDGPUtti"
30
31namespace {
32
33struct AMDGPUImageDMaskIntrinsic {
34 unsigned Intr;
35};
36
37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38#include "AMDGPUGenSearchableTables.inc"
39
40}
41
42
43
44
45
49
53 return maxnum(Src1, Src2);
54
58 return maxnum(Src0, Src2);
59
60 return maxnum(Src0, Src1);
61}
62
63
64
65
66
68 Type *VTy = V.getType();
70
71 return false;
72 }
73 if (IsFloat) {
75
76
77 APFloat FloatValue(ConstFloat->getValueAPF());
78 bool LosesInfo = true;
80 &LosesInfo);
81 return !LosesInfo;
82 }
83 } else {
85
86
87 APInt IntValue(ConstInt->getValue());
89 }
90 }
91
95 if (IsExt) {
98 return true;
99 }
100
101 return false;
102}
103
104
106 Type *VTy = V.getType();
110 return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
112 return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
113
115}
116
117
118
119
124 Func) {
127 return std::nullopt;
128
130
131
132 Func(Args, ArgTys);
133
139
140
143
144 bool RemoveOldIntr = &OldIntr != &InstToReplace;
145
147 if (RemoveOldIntr)
149
150 return RetValue;
151}
152
153static std::optional<Instruction *>
157
158 if (const auto *LZMappingInfo =
160 if (auto *ConstantLod =
162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
165 ImageDimIntr->Dim);
167 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
169 });
170 }
171 }
172 }
173
174
175 if (const auto *MIPMappingInfo =
177 if (auto *ConstantMip =
179 if (ConstantMip->isZero()) {
182 ImageDimIntr->Dim);
184 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
186 });
187 }
188 }
189 }
190
191
192 if (const auto *BiasMappingInfo =
194 if (auto *ConstantBias =
196 if (ConstantBias->isZero()) {
199 ImageDimIntr->Dim);
201 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
204 });
205 }
206 }
207 }
208
209
210 if (const auto *OffsetMappingInfo =
212 if (auto *ConstantOffset =
214 if (ConstantOffset->isZero()) {
217 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
219 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
221 });
222 }
223 }
224 }
225
226
227 if (ST->hasD16Images()) {
228
231
232 if (BaseOpcode->HasD16) {
233
234
235
236
237 if (II.hasOneUse()) {
239
240 if (User->getOpcode() == Instruction::FPTrunc &&
242
244 [&](auto &Args, auto &ArgTys) {
245
246
247 ArgTys[0] = User->getType();
248 });
249 }
250 }
251
252
253
255 ExtractTruncPairs;
256 bool AllHalfExtracts = true;
257
258 for (User *U : II.users()) {
260 if (!Ext || !Ext->hasOneUse()) {
261 AllHalfExtracts = false;
262 break;
263 }
264
266 if (!Tr || !Tr->getType()->isHalfTy()) {
267 AllHalfExtracts = false;
268 break;
269 }
270
272 }
273
274 if (!ExtractTruncPairs.empty() && AllHalfExtracts) {
276 Type *HalfVecTy =
278
279
280
283 SigTys[0] = HalfVecTy;
284
288
289 II.mutateType(HalfVecTy);
290 II.setCalledFunction(HalfDecl);
291
293 for (auto &[Ext, Tr] : ExtractTruncPairs) {
294 Value *Idx = Ext->getIndexOperand();
295
296 Builder.SetInsertPoint(Tr);
297
298 Value *HalfExtract = Builder.CreateExtractElement(&II, Idx);
300
301 Tr->replaceAllUsesWith(HalfExtract);
302 }
303
304 for (auto &[Ext, Tr] : ExtractTruncPairs) {
307 }
308
309 return &II;
310 }
311 }
312 }
313
314
315 if (!ST->hasA16() && !ST->hasG16())
316 return std::nullopt;
317
318
319
320 bool HasSampler =
322 bool FloatCoord = false;
323
324 bool OnlyDerivatives = false;
325
326 for (unsigned OperandIndex = ImageDimIntr->GradientStart;
327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
328 Value *Coord = II.getOperand(OperandIndex);
329
331 if (OperandIndex < ImageDimIntr->CoordStart ||
333 return std::nullopt;
334 }
335
336 OnlyDerivatives = true;
337 break;
338 }
339
343 }
344
345 if (!OnlyDerivatives && !ST->hasA16())
346 OnlyDerivatives = true;
347
348
349 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
352 "Only image instructions with a sampler can have a bias");
354 OnlyDerivatives = true;
355 }
356
357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
359 return std::nullopt;
360
363
365 II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
367 if (!OnlyDerivatives) {
368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
369
370
371 if (ImageDimIntr->NumBiasArgs != 0)
372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
373 }
374
375 unsigned EndIndex =
378 OperandIndex < EndIndex; OperandIndex++) {
379 Args[OperandIndex] =
380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
381 }
382
383
384 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
387 }
388 });
389}
390
394
395
396
397
398
401
402 return true;
403 }
404
407
408 return true;
409 }
410 return false;
411}
412
413
415 Value *Src = nullptr;
418 if (Src->getType()->isHalfTy())
419 return Src;
421 bool LosesInfo;
424 if (!LosesInfo)
426 }
427 return nullptr;
428}
429
430
431
435 unsigned VWidth = VTy->getNumElements();
437
438 for (int i = VWidth - 1; i > 0; --i) {
440 if (!Elt)
441 break;
442
445 break;
446 } else {
447 break;
448 }
449
451 }
452
453 return DemandedElts;
454}
455
456
457
460 unsigned VWidth = VTy->getNumElements();
463
466 SVI->getShuffleMask(ShuffleMask);
467
468 for (int I = VWidth - 1; I > 0; --I) {
469 if (ShuffleMask.empty()) {
471 if (!Elt || (Elt != FirstComponent && (Elt)))
472 break;
473 } else {
474
475
476 if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem)
477 break;
478 }
480 }
481
482 return DemandedElts;
483}
484
487 APInt DemandedElts,
488 int DMaskIdx = -1,
489 bool IsLoad = true);
490
491
497
498
500 Value *V = U.get();
502 return true;
507 return false;
508
509
511 }
512 return false;
513}
514
515
516
517
520 unsigned LaneArgIdx) const {
521 unsigned MaskBits = ST->getWavefrontSizeLog2();
523
526 return true;
527
529 return false;
530
531
532
533
534
535 Value *LaneArg = II.getArgOperand(LaneArgIdx);
537 ConstantInt::get(LaneArg->getType(), Known.getConstant() & DemandedMask);
538 if (MaskedConst != LaneArg) {
539 II.getOperandUse(LaneArgIdx).set(MaskedConst);
540 return true;
541 }
542
543 return false;
544}
545
550
551 CallInst *NewCall = B.CreateCall(&NewCallee, Ops, OpBundles);
553 return NewCall;
554}
555
559 const auto IID = II.getIntrinsicID();
560 assert(IID == Intrinsic::amdgcn_readlane ||
561 IID == Intrinsic::amdgcn_readfirstlane ||
562 IID == Intrinsic::amdgcn_permlane64);
563
565
566
567
568
570 return nullptr;
571
572 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
573
574
575
576 Value *LaneID = nullptr;
577 if (IsReadLane) {
578 LaneID = II.getOperand(1);
579
580
581
582
585 return nullptr;
586 }
587 }
588
589
590
591
592 const auto DoIt = [&](unsigned OpIdx,
595 if (IsReadLane)
596 Ops.push_back(LaneID);
597
598
600
601
604 return &NewOp;
605 };
606
607
608 if (IID == Intrinsic::amdgcn_permlane64 && (OpInst))
609 return nullptr;
610
612 return DoIt(0, II.getCalledFunction());
613
616 Type *SrcTy = Src->getType();
618 return nullptr;
619
622 return DoIt(0, Remangled);
623 }
624
625
627
628
630 return DoIt(1, II.getCalledFunction());
632 return DoIt(0, II.getCalledFunction());
633 }
634
635 return nullptr;
636}
637
638std::optional<Instruction *>
641 switch (IID) {
642 case Intrinsic::amdgcn_rcp: {
643 Value *Src = II.getArgOperand(0);
646
647
650 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
652 }
653
654 if (II.isStrictFP())
655 break;
656
658 const APFloat &ArgVal = C->getValueAPF();
661
662
663
664
665
667 }
668
671 break;
673 if (!SrcCI)
674 break;
675
676 auto IID = SrcCI->getIntrinsicID();
677
678
679
680
681 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
684 if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())
685 break;
686
688 break;
689
691 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
692
693 InnerFMF |= FMF;
694 II.setFastMathFlags(InnerFMF);
695
696 II.setCalledFunction(NewDecl);
698 }
699
700 break;
701 }
702 case Intrinsic::amdgcn_sqrt:
703 case Intrinsic::amdgcn_rsq:
704 case Intrinsic::amdgcn_tanh: {
705 Value *Src = II.getArgOperand(0);
708
709
712 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
714 }
715
716
717 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
719 II.getModule(), Intrinsic::sqrt, {II.getType()});
720 II.setCalledFunction(NewDecl);
721 return &II;
722 }
723
724 break;
725 }
726 case Intrinsic::amdgcn_log:
727 case Intrinsic::amdgcn_exp2: {
728 const bool IsLog = IID == Intrinsic::amdgcn_log;
729 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
730 Value *Src = II.getArgOperand(0);
732
735
738
740 if (C->isInfinity()) {
741
742
743 if (->isNegative())
745
746
747 if (IsExp && C->isNegative())
749 }
750
751 if (II.isStrictFP())
752 break;
753
754 if (C->isNaN()) {
755 Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());
757 }
758
759
760 if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {
762 : ConstantFP::get(Ty, 1.0);
764 }
765
766 if (IsLog && C->isNegative())
768
769
770 }
771
772 break;
773 }
774 case Intrinsic::amdgcn_frexp_mant:
775 case Intrinsic::amdgcn_frexp_exp: {
776 Value *Src = II.getArgOperand(0);
778 int Exp;
781
782 if (IID == Intrinsic::amdgcn_frexp_mant) {
784 II, ConstantFP::get(II.getContext(), Significand));
785 }
786
787
789 Exp = 0;
790
792 }
793
796
799 }
800
801 break;
802 }
803 case Intrinsic::amdgcn_class: {
804 Value *Src0 = II.getArgOperand(0);
805 Value *Src1 = II.getArgOperand(1);
807 if (CMask) {
809 II.getModule(), Intrinsic::is_fpclass, Src0->getType()));
810
811
812 II.setArgOperand(1, ConstantInt::get(Src1->getType(),
814 return &II;
815 }
816
817
820
821
824
825
830 }
831 break;
832 }
833 case Intrinsic::amdgcn_cvt_pkrtz: {
834 auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
836
841
844 bool LosesInfo;
847 return ConstantFP::get(HalfTy, Val);
848 }
849
850 Value *Src = nullptr;
852 if (Src->getType()->isHalfTy())
853 return Src;
854 }
855
856 return nullptr;
857 };
858
859 if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
860 if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
865 }
866 }
867
868 break;
869 }
870 case Intrinsic::amdgcn_cvt_pknorm_i16:
871 case Intrinsic::amdgcn_cvt_pknorm_u16:
872 case Intrinsic::amdgcn_cvt_pk_i16:
873 case Intrinsic::amdgcn_cvt_pk_u16: {
874 Value *Src0 = II.getArgOperand(0);
875 Value *Src1 = II.getArgOperand(1);
876
877
880
883 }
884
885 break;
886 }
887 case Intrinsic::amdgcn_cvt_off_f32_i4: {
888 Value* Arg = II.getArgOperand(0);
890
893
896
898 if (!CArg)
899 break;
900
901
902 constexpr size_t ResValsSize = 16;
903 static constexpr float ResVals[ResValsSize] = {
904 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
905 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
907 ConstantFP::get(Ty, ResVals[CArg->getZExtValue() & (ResValsSize - 1)]);
909 }
910 case Intrinsic::amdgcn_ubfe:
911 case Intrinsic::amdgcn_sbfe: {
912
913 Value *Src = II.getArgOperand(0);
916 }
917
918 unsigned Width;
920 unsigned IntSize = Ty->getIntegerBitWidth();
921
923 if (CWidth) {
925 if ((Width & (IntSize - 1)) == 0) {
927 }
928
929
930 if (Width >= IntSize) {
932 II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
933 }
934 }
935
938 if (COffset) {
940 if (Offset >= IntSize) {
942 II, 1,
943 ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
944 }
945 }
946
947 bool Signed = IID == Intrinsic::amdgcn_sbfe;
948
949 if (!CWidth || !COffset)
950 break;
951
952
953
954
956
957
958
959 if (Offset + Width < IntSize) {
963 RightShift->takeName(&II);
965 }
966
969
970 RightShift->takeName(&II);
972 }
973 case Intrinsic::amdgcn_exp:
974 case Intrinsic::amdgcn_exp_row:
975 case Intrinsic::amdgcn_exp_compr: {
978 if (EnBits == 0xf)
979 break;
980
981 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
983 for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
984 if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
985 (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
986 Value *Src = II.getArgOperand(I + 2);
990 }
991 }
992 }
993
995 return &II;
996 }
997
998 break;
999 }
1000 case Intrinsic::amdgcn_fmed3: {
1001 Value *Src0 = II.getArgOperand(0);
1002 Value *Src1 = II.getArgOperand(1);
1003 Value *Src2 = II.getArgOperand(2);
1004
1005 for (Value *Src : {Src0, Src1, Src2}) {
1008 }
1009
1010 if (II.isStrictFP())
1011 break;
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046 Value *V = nullptr;
1047 const APFloat *ConstSrc0 = nullptr;
1048 const APFloat *ConstSrc1 = nullptr;
1049 const APFloat *ConstSrc2 = nullptr;
1050
1054 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->isPosInfinity();
1057
1058 if (ConstSrc0 && ConstSrc0->isNaN() && ConstSrc0->isSignaling())
1060
1063 break;
1067 break;
1069 break;
1070 }
1074 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->isPosInfinity();
1077
1078 if (ConstSrc1 && ConstSrc1->isNaN() && ConstSrc1->isSignaling())
1080
1083 break;
1087 break;
1089 break;
1090 }
1096 if (ConstSrc2 && ConstSrc2->isNaN() && ConstSrc2->isSignaling()) {
1097 auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());
1099 }
1100
1104 break;
1109 break;
1111 break;
1112 }
1113 }
1114
1115 if (V) {
1117 CI->copyFastMathFlags(&II);
1118 CI->takeName(&II);
1119 }
1121 }
1122
1123 bool Swap = false;
1124
1125
1126
1129 Swap = true;
1130 }
1131
1134 Swap = true;
1135 }
1136
1139 Swap = true;
1140 }
1141
1142 if (Swap) {
1143 II.setArgOperand(0, Src0);
1144 II.setArgOperand(1, Src1);
1145 II.setArgOperand(2, Src2);
1146 return &II;
1147 }
1148
1153 C2->getValueAPF());
1155 ConstantFP::get(II.getType(), Result));
1156 }
1157 }
1158 }
1159
1160 if (!ST->hasMed3_16())
1161 break;
1162
1163
1164
1169 IID, {X->getType()}, {X, Y, Z}, &II, II.getName());
1170 return new FPExtInst(NewCall, II.getType());
1171 }
1172 }
1173 }
1174
1175 break;
1176 }
1177 case Intrinsic::amdgcn_icmp:
1178 case Intrinsic::amdgcn_fcmp: {
1180
1182 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1187 break;
1188
1189 Value *Src0 = II.getArgOperand(0);
1190 Value *Src1 = II.getArgOperand(1);
1191
1199 }
1200
1201
1202
1203
1204
1205
1210 II.getType(), Args);
1211 NewCall->addFnAttr(Attribute::Convergent);
1214 }
1215
1216
1219 II.setArgOperand(0, Src1);
1220 II.setArgOperand(1, Src0);
1221 II.setArgOperand(
1222 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
1223 return &II;
1224 }
1225
1227 break;
1228
1229
1230
1231
1232
1233
1244 return &II;
1245 }
1246
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1267
1269 ? Intrinsic::amdgcn_fcmp
1270 : Intrinsic::amdgcn_icmp;
1271
1274
1275 unsigned Width = CmpType->getBitWidth();
1276 unsigned NewWidth = Width;
1277
1278
1279 if (Width == 1)
1280 break;
1281
1282 if (Width <= 16)
1283 NewWidth = 16;
1284 else if (Width <= 32)
1285 NewWidth = 32;
1286 else if (Width <= 64)
1287 NewWidth = 64;
1288 else
1289 break;
1290
1291 if (Width != NewWidth) {
1296 } else {
1299 }
1300 }
1301 } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1302 break;
1303
1304 Value *Args[] = {SrcLHS, SrcRHS,
1305 ConstantInt::get(CC->getType(), SrcPred)};
1307 NewIID, {II.getType(), SrcLHS->getType()}, Args);
1310 }
1311
1312 break;
1313 }
1314 case Intrinsic::amdgcn_mbcnt_hi: {
1315
1316 if (ST->isWave32())
1318 break;
1319 }
1320 case Intrinsic::amdgcn_ballot: {
1321 Value *Arg = II.getArgOperand(0);
1324
1326 if (Src->isZero()) {
1327
1329 }
1330 }
1331 if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
1332
1333
1334
1335
1338 {IC.Builder.getInt32Ty()},
1339 {II.getArgOperand(0)}),
1340 II.getType());
1343 }
1344 break;
1345 }
1346 case Intrinsic::amdgcn_wavefrontsize: {
1347 if (ST->isWaveSizeKnown())
1349 II, ConstantInt::get(II.getType(), ST->getWavefrontSize()));
1350 break;
1351 }
1352 case Intrinsic::amdgcn_wqm_vote: {
1353
1355 break;
1356
1358 }
1359 case Intrinsic::amdgcn_kill: {
1361 if ( ||
->getZExtValue())
1362 break;
1363
1364
1366 }
1367 case Intrinsic::amdgcn_update_dpp: {
1368 Value *Old = II.getArgOperand(0);
1369
1373 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1375 break;
1376
1377
1379 }
1380 case Intrinsic::amdgcn_permlane16:
1381 case Intrinsic::amdgcn_permlane16_var:
1382 case Intrinsic::amdgcn_permlanex16:
1383 case Intrinsic::amdgcn_permlanex16_var: {
1384
1385 Value *VDstIn = II.getArgOperand(0);
1387 break;
1388
1389
1390 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1391 IID == Intrinsic::amdgcn_permlanex16)
1392 ? 4
1393 : 3;
1394
1395
1396
1397
1398 unsigned int BcIdx = FiIdx + 1;
1399
1403 break;
1404
1406 }
1407 case Intrinsic::amdgcn_permlane64:
1408 case Intrinsic::amdgcn_readfirstlane:
1409 case Intrinsic::amdgcn_readlane:
1410 case Intrinsic::amdgcn_ds_bpermute: {
1411
1412 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1413 const Use &Src = II.getArgOperandUse(SrcIdx);
1416
1417 if (IID == Intrinsic::amdgcn_readlane &&
1419 return &II;
1420
1421
1422
1423
1424 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1425 const Use &Lane = II.getArgOperandUse(0);
1429 II.getModule(), Intrinsic::amdgcn_readlane, II.getType());
1430 II.setCalledFunction(NewDecl);
1431 II.setOperand(0, Src);
1432 II.setOperand(1, NewLane);
1433 return &II;
1434 }
1435 }
1436
1437 if (IID != Intrinsic::amdgcn_ds_bpermute) {
1439 return Res;
1440 }
1441
1442 return std::nullopt;
1443 }
1444 case Intrinsic::amdgcn_writelane: {
1445
1447 return &II;
1448 return std::nullopt;
1449 }
1450 case Intrinsic::amdgcn_trig_preop: {
1451
1452
1453 if (.getType()->isDoubleTy())
1454 break;
1455
1456 Value *Src = II.getArgOperand(0);
1457 Value *Segment = II.getArgOperand(1);
1460
1462 auto *QNaN = ConstantFP::get(
1465 }
1466
1468 if (!Csrc)
1469 break;
1470
1471 if (II.isStrictFP())
1472 break;
1473
1475 if (Fsrc.isNaN()) {
1476 auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet());
1478 }
1479
1481 if (!Cseg)
1482 break;
1483
1486 unsigned Shift = SegmentVal * 53;
1489
1490
1491 static const uint32_t TwoByPi[] = {
1492 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1493 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1494 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1495 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1496 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1497 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1498 0x56033046};
1499
1500
1501 unsigned Idx = Shift >> 5;
1502 if (Idx + 2 >= std::size(TwoByPi)) {
1505 }
1506
1507 unsigned BShift = Shift & 0x1f;
1510 if (BShift)
1511 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1512 Thi = Thi >> 11;
1514
1515 int Scale = -53 - Shift;
1517 Scale += 128;
1518
1521 }
1522 case Intrinsic::amdgcn_fmul_legacy: {
1523 Value *Op0 = II.getArgOperand(0);
1524 Value *Op1 = II.getArgOperand(1);
1525
1526 for (Value *Src : {Op0, Op1}) {
1529 }
1530
1531
1532
1533
1537
1538
1539
1544 }
1545 break;
1546 }
1547 case Intrinsic::amdgcn_fma_legacy: {
1548 Value *Op0 = II.getArgOperand(0);
1549 Value *Op1 = II.getArgOperand(1);
1550 Value *Op2 = II.getArgOperand(2);
1551
1552 for (Value *Src : {Op0, Op1, Op2}) {
1555 }
1556
1557
1558
1559
1562
1563
1568 }
1569
1570
1571
1574 II.getModule(), Intrinsic::fma, II.getType()));
1575 return &II;
1576 }
1577 break;
1578 }
1579 case Intrinsic::amdgcn_is_shared:
1580 case Intrinsic::amdgcn_is_private: {
1581 Value *Src = II.getArgOperand(0);
1586
1589 break;
1590 }
1591 case Intrinsic::amdgcn_make_buffer_rsrc: {
1592 Value *Src = II.getArgOperand(0);
1595 return std::nullopt;
1596 }
1597 case Intrinsic::amdgcn_raw_buffer_store_format:
1598 case Intrinsic::amdgcn_struct_buffer_store_format:
1599 case Intrinsic::amdgcn_raw_tbuffer_store:
1600 case Intrinsic::amdgcn_struct_tbuffer_store:
1601 case Intrinsic::amdgcn_image_store_1d:
1602 case Intrinsic::amdgcn_image_store_1darray:
1603 case Intrinsic::amdgcn_image_store_2d:
1604 case Intrinsic::amdgcn_image_store_2darray:
1605 case Intrinsic::amdgcn_image_store_2darraymsaa:
1606 case Intrinsic::amdgcn_image_store_2dmsaa:
1607 case Intrinsic::amdgcn_image_store_3d:
1608 case Intrinsic::amdgcn_image_store_cube:
1609 case Intrinsic::amdgcn_image_store_mip_1d:
1610 case Intrinsic::amdgcn_image_store_mip_1darray:
1611 case Intrinsic::amdgcn_image_store_mip_2d:
1612 case Intrinsic::amdgcn_image_store_mip_2darray:
1613 case Intrinsic::amdgcn_image_store_mip_3d:
1614 case Intrinsic::amdgcn_image_store_mip_cube: {
1616 break;
1617
1618 APInt DemandedElts;
1619 if (ST->hasDefaultComponentBroadcast())
1621 else if (ST->hasDefaultComponentZero())
1623 else
1624 break;
1625
1626 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
1628 false)) {
1630 }
1631
1632 break;
1633 }
1634 case Intrinsic::amdgcn_prng_b32: {
1635 auto *Src = II.getArgOperand(0);
1638 }
1639 return std::nullopt;
1640 }
1641 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1642 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1643 Value *Src0 = II.getArgOperand(0);
1644 Value *Src1 = II.getArgOperand(1);
1649
1650 auto getFormatNumRegs = [](unsigned FormatVal) {
1651 switch (FormatVal) {
1654 return 6u;
1656 return 4u;
1659 return 8u;
1660 default:
1662 }
1663 };
1664
1665 bool MadeChange = false;
1666 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1667 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1668
1669
1670
1671 if (Src0Ty->getNumElements() > Src0NumElts) {
1675 MadeChange = true;
1676 }
1677
1678 if (Src1Ty->getNumElements() > Src1NumElts) {
1682 MadeChange = true;
1683 }
1684
1685 if (!MadeChange)
1686 return std::nullopt;
1687
1689 Args[0] = Src0;
1690 Args[1] = Src1;
1691
1696 }
1697 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
1698 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1699 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
1700 Value *Src0 = II.getArgOperand(1);
1701 Value *Src1 = II.getArgOperand(3);
1706
1707 bool MadeChange = false;
1710
1711
1712
1713 if (Src0Ty->getNumElements() > Src0NumElts) {
1717 MadeChange = true;
1718 }
1719
1720 if (Src1Ty->getNumElements() > Src1NumElts) {
1724 MadeChange = true;
1725 }
1726
1727 if (!MadeChange)
1728 return std::nullopt;
1729
1731 Args[1] = Src0;
1732 Args[3] = Src1;
1733
1735 IID, {II.getArgOperand(5)->getType(), Src0->getType(), Src1->getType()},
1736 Args, &II);
1739 }
1740 }
1744 }
1745 return std::nullopt;
1746}
1747
1748
1749
1750
1751
1752
1753
1754
1757 APInt DemandedElts,
1758 int DMaskIdx, bool IsLoad) {
1759
1761 : II.getOperand(0)->getType());
1762 unsigned VWidth = IIVTy->getNumElements();
1763 if (VWidth == 1)
1764 return nullptr;
1765 Type *EltTy = IIVTy->getElementType();
1766
1769
1770
1772
1773 if (DMaskIdx < 0) {
1774
1775
1776 const unsigned ActiveBits = DemandedElts.getActiveBits();
1777 const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();
1778
1779
1780
1781
1782 DemandedElts = (1 << ActiveBits) - 1;
1783
1784 if (UnusedComponentsAtFront > 0) {
1785 static const unsigned InvalidOffsetIdx = 0xf;
1786
1787 unsigned OffsetIdx;
1788 switch (II.getIntrinsicID()) {
1789 case Intrinsic::amdgcn_raw_buffer_load:
1790 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1791 OffsetIdx = 1;
1792 break;
1793 case Intrinsic::amdgcn_s_buffer_load:
1794
1795
1796
1797 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1798 OffsetIdx = InvalidOffsetIdx;
1799 else
1800 OffsetIdx = 1;
1801 break;
1802 case Intrinsic::amdgcn_struct_buffer_load:
1803 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1804 OffsetIdx = 2;
1805 break;
1806 default:
1807
1808 OffsetIdx = InvalidOffsetIdx;
1809 break;
1810 }
1811
1812 if (OffsetIdx != InvalidOffsetIdx) {
1813
1814 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1815 auto *Offset = Args[OffsetIdx];
1816 unsigned SingleComponentSizeInBits =
1818 unsigned OffsetAdd =
1819 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1820 auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1822 }
1823 }
1824 } else {
1825
1826
1828 unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1829
1830
1831 if (DMaskVal == 0)
1832 return nullptr;
1833
1834
1835 DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
1836
1837 unsigned NewDMaskVal = 0;
1838 unsigned OrigLdStIdx = 0;
1839 for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1840 const unsigned Bit = 1 << SrcIdx;
1841 if (!!(DMaskVal & Bit)) {
1842 if (!!DemandedElts[OrigLdStIdx])
1843 NewDMaskVal |= Bit;
1844 OrigLdStIdx++;
1845 }
1846 }
1847
1848 if (DMaskVal != NewDMaskVal)
1849 Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1850 }
1851
1852 unsigned NewNumElts = DemandedElts.popcount();
1853 if (!NewNumElts)
1855
1856 if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1857 if (DMaskIdx >= 0)
1858 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1859 return nullptr;
1860 }
1861
1862
1863
1866 return nullptr;
1867
1868 Type *NewTy =
1870 OverloadTys[0] = NewTy;
1871
1872 if (!IsLoad) {
1874 for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1875 if (DemandedElts[OrigStoreIdx])
1876 EltMask.push_back(OrigStoreIdx);
1877
1878 if (NewNumElts == 1)
1880 else
1882 }
1883
1888
1889 if (IsLoad) {
1890 if (NewNumElts == 1) {
1893 }
1894
1896 unsigned NewLoadIdx = 0;
1897 for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1898 if (!!DemandedElts[OrigLoadIdx])
1899 EltMask.push_back(NewLoadIdx++);
1900 else
1902 }
1903
1905
1906 return Shuffle;
1907 }
1908
1909 return NewCall;
1910}
1911
1914 APInt &UndefElts) const {
1916 if (!VT)
1917 return nullptr;
1918
1919 const unsigned FirstElt = DemandedElts.countr_zero();
1920 const unsigned LastElt = DemandedElts.getActiveBits() - 1;
1921 const unsigned MaskLen = LastElt - FirstElt + 1;
1922
1923 unsigned OldNumElts = VT->getNumElements();
1924 if (MaskLen == OldNumElts && MaskLen != 1)
1925 return nullptr;
1926
1927 Type *EltTy = VT->getElementType();
1929
1930
1931
1933 return nullptr;
1934
1935 Value *Src = II.getArgOperand(0);
1936
1937
1938
1940 II.getOperandBundlesAsDefs(OpBundles);
1941
1945
1946 if (MaskLen == 1) {
1948
1949
1951
1953 NewCall, FirstElt);
1954 }
1955
1957 for (unsigned I = 0; I != MaskLen; ++I) {
1958 if (DemandedElts[FirstElt + I])
1959 ExtractMask[I] = FirstElt + I;
1960 }
1961
1963
1964
1966
1968 for (unsigned I = 0; I != MaskLen; ++I) {
1969 if (DemandedElts[FirstElt + I])
1970 InsertMask[FirstElt + I] = I;
1971 }
1972
1973
1974
1976}
1977
1980 APInt &UndefElts2, APInt &UndefElts3,
1982 SimplifyAndSetOp) const {
1983 switch (II.getIntrinsicID()) {
1984 case Intrinsic::amdgcn_readfirstlane:
1985 SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1987 case Intrinsic::amdgcn_raw_buffer_load:
1988 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1989 case Intrinsic::amdgcn_raw_buffer_load_format:
1990 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1991 case Intrinsic::amdgcn_raw_tbuffer_load:
1992 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1993 case Intrinsic::amdgcn_s_buffer_load:
1994 case Intrinsic::amdgcn_struct_buffer_load:
1995 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1996 case Intrinsic::amdgcn_struct_buffer_load_format:
1997 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1998 case Intrinsic::amdgcn_struct_tbuffer_load:
1999 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2001 default: {
2002 if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
2004 }
2005 break;
2006 }
2007 }
2008 return std::nullopt;
2009}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
Definition AMDGPUInstCombineIntrinsic.cpp:492
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
Definition AMDGPUInstCombineIntrinsic.cpp:499
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
Definition AMDGPUInstCombineIntrinsic.cpp:546
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
Definition AMDGPUInstCombineIntrinsic.cpp:105
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
Definition AMDGPUInstCombineIntrinsic.cpp:432
static APInt defaultComponentBroadcast(Value *V)
Definition AMDGPUInstCombineIntrinsic.cpp:458
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
Definition AMDGPUInstCombineIntrinsic.cpp:120
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Definition AMDGPUInstCombineIntrinsic.cpp:46
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Definition AMDGPUInstCombineIntrinsic.cpp:1755
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
Definition AMDGPUInstCombineIntrinsic.cpp:154
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Definition AMDGPUInstCombineIntrinsic.cpp:67
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Definition AMDGPUInstCombineIntrinsic.cpp:414
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
This file a TargetTransformInfoImplBase conforming object specific to the AMDGPU target machine.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
Definition AMDGPUInstCombineIntrinsic.cpp:518
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition AMDGPUInstCombineIntrinsic.cpp:639
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
Definition AMDGPUInstCombineIntrinsic.cpp:557
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition AMDGPUInstCombineIntrinsic.cpp:1978
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
Definition AMDGPUInstCombineIntrinsic.cpp:1912
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition AMDGPUInstCombineIntrinsic.cpp:391
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.