LLVM: lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
22#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include
25
26using namespace llvm;
28
29#define DEBUG_TYPE "AMDGPUtti"
30
31namespace {
32
33struct AMDGPUImageDMaskIntrinsic {
34 unsigned Intr;
35};
36
37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38#include "AMDGPUGenSearchableTables.inc"
39
40}
41
42
43
44
45
49
53 return maxnum(Src1, Src2);
54
58 return maxnum(Src0, Src2);
59
60 return maxnum(Src0, Src1);
61}
62
63
64
65
66
68 Type *VTy = V.getType();
70
71 return false;
72 }
73 if (IsFloat) {
75
76
77 APFloat FloatValue(ConstFloat->getValueAPF());
78 bool LosesInfo = true;
80 &LosesInfo);
81 return !LosesInfo;
82 }
83 } else {
85
86
87 APInt IntValue(ConstInt->getValue());
89 }
90 }
91
95 if (IsExt) {
98 return true;
99 }
100
101 return false;
102}
103
104
106 Type *VTy = V.getType();
110 return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
112 return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
113
115}
116
117
118
119
124 Func) {
127 return std::nullopt;
128
130
131
132 Func(Args, ArgTys);
133
139
140
143
144 bool RemoveOldIntr = &OldIntr != &InstToReplace;
145
147 if (RemoveOldIntr)
149
150 return RetValue;
151}
152
153static std::optional<Instruction *>
157
158 if (const auto *LZMappingInfo =
160 if (auto *ConstantLod =
162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
165 ImageDimIntr->Dim);
167 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
169 });
170 }
171 }
172 }
173
174
175 if (const auto *MIPMappingInfo =
177 if (auto *ConstantMip =
179 if (ConstantMip->isZero()) {
182 ImageDimIntr->Dim);
184 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
186 });
187 }
188 }
189 }
190
191
192 if (const auto *BiasMappingInfo =
194 if (auto *ConstantBias =
196 if (ConstantBias->isZero()) {
199 ImageDimIntr->Dim);
201 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
204 });
205 }
206 }
207 }
208
209
210 if (const auto *OffsetMappingInfo =
212 if (auto *ConstantOffset =
214 if (ConstantOffset->isZero()) {
217 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
219 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
221 });
222 }
223 }
224 }
225
226
227 if (ST->hasD16Images()) {
228
231
232 if (BaseOpcode->HasD16) {
233
234
235
236
237 if (II.hasOneUse()) {
239
240 if (User->getOpcode() == Instruction::FPTrunc &&
242
244 [&](auto &Args, auto &ArgTys) {
245
246
247 ArgTys[0] = User->getType();
248 });
249 }
250 }
251
252
253
255 ExtractTruncPairs;
256 bool AllHalfExtracts = true;
257
258 for (User *U : II.users()) {
260 if (!Ext || !Ext->hasOneUse()) {
261 AllHalfExtracts = false;
262 break;
263 }
264
266 if (!Tr || !Tr->getType()->isHalfTy()) {
267 AllHalfExtracts = false;
268 break;
269 }
270
272 }
273
274 if (!ExtractTruncPairs.empty() && AllHalfExtracts) {
276 Type *HalfVecTy =
278
279
280
283 SigTys[0] = HalfVecTy;
284
288
289 II.mutateType(HalfVecTy);
290 II.setCalledFunction(HalfDecl);
291
293 for (auto &[Ext, Tr] : ExtractTruncPairs) {
294 Value *Idx = Ext->getIndexOperand();
295
296 Builder.SetInsertPoint(Tr);
297
298 Value *HalfExtract = Builder.CreateExtractElement(&II, Idx);
300
301 Tr->replaceAllUsesWith(HalfExtract);
302 }
303
304 for (auto &[Ext, Tr] : ExtractTruncPairs) {
307 }
308
309 return &II;
310 }
311 }
312 }
313
314
315 if (!ST->hasA16() && !ST->hasG16())
316 return std::nullopt;
317
318
319
320 bool HasSampler =
322 bool FloatCoord = false;
323
324 bool OnlyDerivatives = false;
325
326 for (unsigned OperandIndex = ImageDimIntr->GradientStart;
327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
328 Value *Coord = II.getOperand(OperandIndex);
329
331 if (OperandIndex < ImageDimIntr->CoordStart ||
333 return std::nullopt;
334 }
335
336 OnlyDerivatives = true;
337 break;
338 }
339
343 }
344
345 if (!OnlyDerivatives && !ST->hasA16())
346 OnlyDerivatives = true;
347
348
349 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
352 "Only image instructions with a sampler can have a bias");
354 OnlyDerivatives = true;
355 }
356
357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
359 return std::nullopt;
360
363
365 II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
367 if (!OnlyDerivatives) {
368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
369
370
371 if (ImageDimIntr->NumBiasArgs != 0)
372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
373 }
374
375 unsigned EndIndex =
378 OperandIndex < EndIndex; OperandIndex++) {
379 Args[OperandIndex] =
380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
381 }
382
383
384 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
387 }
388 });
389}
390
394
395
396
397
398
401
402 return true;
403 }
404
407
408 return true;
409 }
410 return false;
411}
412
413
415 Value *Src = nullptr;
418 if (Src->getType()->isHalfTy())
419 return Src;
421 bool LosesInfo;
424 if (!LosesInfo)
426 }
427 return nullptr;
428}
429
430
431
435 unsigned VWidth = VTy->getNumElements();
437
438 for (int i = VWidth - 1; i > 0; --i) {
440 if (!Elt)
441 break;
442
445 break;
446 } else {
447 break;
448 }
449
451 }
452
453 return DemandedElts;
454}
455
456
457
460 unsigned VWidth = VTy->getNumElements();
463
466 SVI->getShuffleMask(ShuffleMask);
467
468 for (int I = VWidth - 1; I > 0; --I) {
469 if (ShuffleMask.empty()) {
471 if (!Elt || (Elt != FirstComponent && (Elt)))
472 break;
473 } else {
474
475
476 if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem)
477 break;
478 }
480 }
481
482 return DemandedElts;
483}
484
487 APInt DemandedElts,
488 int DMaskIdx = -1,
489 bool IsLoad = true);
490
491
497
498
500 Value *V = U.get();
502 return true;
507 return false;
508
509
511 }
512 return false;
513}
514
515
516
517
520 unsigned LaneArgIdx) const {
521 unsigned MaskBits = ST->getWavefrontSizeLog2();
523
526 return true;
527
529 return false;
530
531
532
533
534
535 Value *LaneArg = II.getArgOperand(LaneArgIdx);
537 ConstantInt::get(LaneArg->getType(), Known.getConstant() & DemandedMask);
538 if (MaskedConst != LaneArg) {
539 II.getOperandUse(LaneArgIdx).set(MaskedConst);
540 return true;
541 }
542
543 return false;
544}
545
550
551 CallInst *NewCall = B.CreateCall(&NewCallee, Ops, OpBundles);
553 return NewCall;
554}
555
559 const auto IID = II.getIntrinsicID();
560 assert(IID == Intrinsic::amdgcn_readlane ||
561 IID == Intrinsic::amdgcn_readfirstlane ||
562 IID == Intrinsic::amdgcn_permlane64);
563
565
566
567
568
570 return nullptr;
571
572 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
573
574
575
576 Value *LaneID = nullptr;
577 if (IsReadLane) {
578 LaneID = II.getOperand(1);
579
580
581
582
585 return nullptr;
586 }
587 }
588
589
590
591
592 const auto DoIt = [&](unsigned OpIdx,
595 if (IsReadLane)
596 Ops.push_back(LaneID);
597
598
600
601
604 return &NewOp;
605 };
606
607
608 if (IID == Intrinsic::amdgcn_permlane64 && (OpInst))
609 return nullptr;
610
612 return DoIt(0, II.getCalledFunction());
613
616 Type *SrcTy = Src->getType();
618 return nullptr;
619
622 return DoIt(0, Remangled);
623 }
624
625
627
628
630 return DoIt(1, II.getCalledFunction());
632 return DoIt(0, II.getCalledFunction());
633 }
634
635 return nullptr;
636}
637
638std::optional<Instruction *>
641 switch (IID) {
642 case Intrinsic::amdgcn_rcp: {
643 Value *Src = II.getArgOperand(0);
646
647
650 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
652 }
653
654 if (II.isStrictFP())
655 break;
656
658 const APFloat &ArgVal = C->getValueAPF();
661
662
663
664
665
667 }
668
671 break;
673 if (!SrcCI)
674 break;
675
676 auto IID = SrcCI->getIntrinsicID();
677
678
679
680
681 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
684 if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())
685 break;
686
688 break;
689
691 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
692
693 InnerFMF |= FMF;
694 II.setFastMathFlags(InnerFMF);
695
696 II.setCalledFunction(NewDecl);
698 }
699
700 break;
701 }
702 case Intrinsic::amdgcn_sqrt:
703 case Intrinsic::amdgcn_rsq:
704 case Intrinsic::amdgcn_tanh: {
705 Value *Src = II.getArgOperand(0);
708
709
712 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
714 }
715
716
717 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
719 II.getModule(), Intrinsic::sqrt, {II.getType()});
720 II.setCalledFunction(NewDecl);
721 return &II;
722 }
723
724 break;
725 }
726 case Intrinsic::amdgcn_log:
727 case Intrinsic::amdgcn_exp2: {
728 const bool IsLog = IID == Intrinsic::amdgcn_log;
729 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
730 Value *Src = II.getArgOperand(0);
732
735
738
740 if (C->isInfinity()) {
741
742
743 if (->isNegative())
745
746
747 if (IsExp && C->isNegative())
749 }
750
751 if (II.isStrictFP())
752 break;
753
754 if (C->isNaN()) {
755 Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());
757 }
758
759
760 if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {
762 : ConstantFP::get(Ty, 1.0);
764 }
765
766 if (IsLog && C->isNegative())
768
769
770 }
771
772 break;
773 }
774 case Intrinsic::amdgcn_frexp_mant:
775 case Intrinsic::amdgcn_frexp_exp: {
776 Value *Src = II.getArgOperand(0);
778 int Exp;
781
782 if (IID == Intrinsic::amdgcn_frexp_mant) {
784 II, ConstantFP::get(II.getContext(), Significand));
785 }
786
787
789 Exp = 0;
790
793 }
794
797
800 }
801
802 break;
803 }
804 case Intrinsic::amdgcn_class: {
805 Value *Src0 = II.getArgOperand(0);
806 Value *Src1 = II.getArgOperand(1);
808 if (CMask) {
810 II.getModule(), Intrinsic::is_fpclass, Src0->getType()));
811
812
813 II.setArgOperand(1, ConstantInt::get(Src1->getType(),
815 return &II;
816 }
817
818
821
822
825
826
831 }
832 break;
833 }
834 case Intrinsic::amdgcn_cvt_pkrtz: {
835 auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
837
842
845 bool LosesInfo;
848 return ConstantFP::get(HalfTy, Val);
849 }
850
851 Value *Src = nullptr;
853 if (Src->getType()->isHalfTy())
854 return Src;
855 }
856
857 return nullptr;
858 };
859
860 if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
861 if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
866 }
867 }
868
869 break;
870 }
871 case Intrinsic::amdgcn_cvt_pknorm_i16:
872 case Intrinsic::amdgcn_cvt_pknorm_u16:
873 case Intrinsic::amdgcn_cvt_pk_i16:
874 case Intrinsic::amdgcn_cvt_pk_u16: {
875 Value *Src0 = II.getArgOperand(0);
876 Value *Src1 = II.getArgOperand(1);
877
878
881
884 }
885
886 break;
887 }
888 case Intrinsic::amdgcn_cvt_off_f32_i4: {
889 Value* Arg = II.getArgOperand(0);
891
894
897
899 if (!CArg)
900 break;
901
902
903 constexpr size_t ResValsSize = 16;
904 static constexpr float ResVals[ResValsSize] = {
905 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
906 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
908 ConstantFP::get(Ty, ResVals[CArg->getZExtValue() & (ResValsSize - 1)]);
910 }
911 case Intrinsic::amdgcn_ubfe:
912 case Intrinsic::amdgcn_sbfe: {
913
914 Value *Src = II.getArgOperand(0);
917 }
918
919 unsigned Width;
921 unsigned IntSize = Ty->getIntegerBitWidth();
922
924 if (CWidth) {
926 if ((Width & (IntSize - 1)) == 0) {
928 }
929
930
931 if (Width >= IntSize) {
933 II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
934 }
935 }
936
939 if (COffset) {
941 if (Offset >= IntSize) {
943 II, 1,
944 ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
945 }
946 }
947
948 bool Signed = IID == Intrinsic::amdgcn_sbfe;
949
950 if (!CWidth || !COffset)
951 break;
952
953
954
955
957
958
959
960 if (Offset + Width < IntSize) {
964 RightShift->takeName(&II);
966 }
967
970
971 RightShift->takeName(&II);
973 }
974 case Intrinsic::amdgcn_exp:
975 case Intrinsic::amdgcn_exp_row:
976 case Intrinsic::amdgcn_exp_compr: {
979 if (EnBits == 0xf)
980 break;
981
982 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
984 for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
985 if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
986 (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
987 Value *Src = II.getArgOperand(I + 2);
991 }
992 }
993 }
994
996 return &II;
997 }
998
999 break;
1000 }
1001 case Intrinsic::amdgcn_fmed3: {
1002 Value *Src0 = II.getArgOperand(0);
1003 Value *Src1 = II.getArgOperand(1);
1004 Value *Src2 = II.getArgOperand(2);
1005
1006 for (Value *Src : {Src0, Src1, Src2}) {
1009 }
1010
1011 if (II.isStrictFP())
1012 break;
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 Value *V = nullptr;
1048 const APFloat *ConstSrc0 = nullptr;
1049 const APFloat *ConstSrc1 = nullptr;
1050 const APFloat *ConstSrc2 = nullptr;
1051
1055 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->isPosInfinity();
1058
1059 if (ConstSrc0 && ConstSrc0->isNaN() && ConstSrc0->isSignaling())
1061
1064 break;
1068 break;
1070 break;
1071 }
1075 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->isPosInfinity();
1078
1079 if (ConstSrc1 && ConstSrc1->isNaN() && ConstSrc1->isSignaling())
1081
1084 break;
1088 break;
1090 break;
1091 }
1097 if (ConstSrc2 && ConstSrc2->isNaN() && ConstSrc2->isSignaling()) {
1098 auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());
1100 }
1101
1105 break;
1110 break;
1112 break;
1113 }
1114 }
1115
1116 if (V) {
1118 CI->copyFastMathFlags(&II);
1119 CI->takeName(&II);
1120 }
1122 }
1123
1124 bool Swap = false;
1125
1126
1127
1130 Swap = true;
1131 }
1132
1135 Swap = true;
1136 }
1137
1140 Swap = true;
1141 }
1142
1143 if (Swap) {
1144 II.setArgOperand(0, Src0);
1145 II.setArgOperand(1, Src1);
1146 II.setArgOperand(2, Src2);
1147 return &II;
1148 }
1149
1154 C2->getValueAPF());
1156 ConstantFP::get(II.getType(), Result));
1157 }
1158 }
1159 }
1160
1161 if (!ST->hasMed3_16())
1162 break;
1163
1164
1165
1170 IID, {X->getType()}, {X, Y, Z}, &II, II.getName());
1171 return new FPExtInst(NewCall, II.getType());
1172 }
1173 }
1174 }
1175
1176 break;
1177 }
1178 case Intrinsic::amdgcn_icmp:
1179 case Intrinsic::amdgcn_fcmp: {
1181
1183 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1188 break;
1189
1190 Value *Src0 = II.getArgOperand(0);
1191 Value *Src1 = II.getArgOperand(1);
1192
1200 }
1201
1202
1203
1204
1205
1206
1211 II.getType(), Args);
1212 NewCall->addFnAttr(Attribute::Convergent);
1215 }
1216
1217
1220 II.setArgOperand(0, Src1);
1221 II.setArgOperand(1, Src0);
1222 II.setArgOperand(
1223 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
1224 return &II;
1225 }
1226
1228 break;
1229
1230
1231
1232
1233
1234
1245 return &II;
1246 }
1247
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1268
1270 ? Intrinsic::amdgcn_fcmp
1271 : Intrinsic::amdgcn_icmp;
1272
1275
1276 unsigned Width = CmpType->getBitWidth();
1277 unsigned NewWidth = Width;
1278
1279
1280 if (Width == 1)
1281 break;
1282
1283 if (Width <= 16)
1284 NewWidth = 16;
1285 else if (Width <= 32)
1286 NewWidth = 32;
1287 else if (Width <= 64)
1288 NewWidth = 64;
1289 else
1290 break;
1291
1292 if (Width != NewWidth) {
1297 } else {
1300 }
1301 }
1302 } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1303 break;
1304
1305 Value *Args[] = {SrcLHS, SrcRHS,
1306 ConstantInt::get(CC->getType(), SrcPred)};
1308 NewIID, {II.getType(), SrcLHS->getType()}, Args);
1311 }
1312
1313 break;
1314 }
1315 case Intrinsic::amdgcn_mbcnt_hi: {
1316
1317 if (ST->isWave32())
1319 break;
1320 }
1321 case Intrinsic::amdgcn_ballot: {
1322 Value *Arg = II.getArgOperand(0);
1325
1327 if (Src->isZero()) {
1328
1330 }
1331 }
1332 if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
1333
1334
1335
1336
1339 {IC.Builder.getInt32Ty()},
1340 {II.getArgOperand(0)}),
1341 II.getType());
1344 }
1345 break;
1346 }
1347 case Intrinsic::amdgcn_wavefrontsize: {
1348 if (ST->isWaveSizeKnown())
1350 II, ConstantInt::get(II.getType(), ST->getWavefrontSize()));
1351 break;
1352 }
1353 case Intrinsic::amdgcn_wqm_vote: {
1354
1356 break;
1357
1359 }
1360 case Intrinsic::amdgcn_kill: {
1362 if ( ||
->getZExtValue())
1363 break;
1364
1365
1367 }
1368 case Intrinsic::amdgcn_update_dpp: {
1369 Value *Old = II.getArgOperand(0);
1370
1374 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1376 break;
1377
1378
1380 }
1381 case Intrinsic::amdgcn_permlane16:
1382 case Intrinsic::amdgcn_permlane16_var:
1383 case Intrinsic::amdgcn_permlanex16:
1384 case Intrinsic::amdgcn_permlanex16_var: {
1385
1386 Value *VDstIn = II.getArgOperand(0);
1388 break;
1389
1390
1391 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1392 IID == Intrinsic::amdgcn_permlanex16)
1393 ? 4
1394 : 3;
1395
1396
1397
1398
1399 unsigned int BcIdx = FiIdx + 1;
1400
1404 break;
1405
1407 }
1408 case Intrinsic::amdgcn_permlane64:
1409 case Intrinsic::amdgcn_readfirstlane:
1410 case Intrinsic::amdgcn_readlane:
1411 case Intrinsic::amdgcn_ds_bpermute: {
1412
1413 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1414 const Use &Src = II.getArgOperandUse(SrcIdx);
1417
1418 if (IID == Intrinsic::amdgcn_readlane &&
1420 return &II;
1421
1422
1423
1424
1425 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1426 const Use &Lane = II.getArgOperandUse(0);
1430 II.getModule(), Intrinsic::amdgcn_readlane, II.getType());
1431 II.setCalledFunction(NewDecl);
1432 II.setOperand(0, Src);
1433 II.setOperand(1, NewLane);
1434 return &II;
1435 }
1436 }
1437
1438 if (IID != Intrinsic::amdgcn_ds_bpermute) {
1440 return Res;
1441 }
1442
1443 return std::nullopt;
1444 }
1445 case Intrinsic::amdgcn_writelane: {
1446
1448 return &II;
1449 return std::nullopt;
1450 }
1451 case Intrinsic::amdgcn_trig_preop: {
1452
1453
1454 if (.getType()->isDoubleTy())
1455 break;
1456
1457 Value *Src = II.getArgOperand(0);
1458 Value *Segment = II.getArgOperand(1);
1461
1463 auto *QNaN = ConstantFP::get(
1466 }
1467
1469 if (!Csrc)
1470 break;
1471
1472 if (II.isStrictFP())
1473 break;
1474
1476 if (Fsrc.isNaN()) {
1477 auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet());
1479 }
1480
1482 if (!Cseg)
1483 break;
1484
1487 unsigned Shift = SegmentVal * 53;
1490
1491
1492 static const uint32_t TwoByPi[] = {
1493 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1494 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1495 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1496 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1497 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1498 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1499 0x56033046};
1500
1501
1502 unsigned Idx = Shift >> 5;
1503 if (Idx + 2 >= std::size(TwoByPi)) {
1506 }
1507
1508 unsigned BShift = Shift & 0x1f;
1511 if (BShift)
1512 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1513 Thi = Thi >> 11;
1515
1516 int Scale = -53 - Shift;
1518 Scale += 128;
1519
1522 }
1523 case Intrinsic::amdgcn_fmul_legacy: {
1524 Value *Op0 = II.getArgOperand(0);
1525 Value *Op1 = II.getArgOperand(1);
1526
1527 for (Value *Src : {Op0, Op1}) {
1530 }
1531
1532
1533
1534
1538
1539
1540
1545 }
1546 break;
1547 }
1548 case Intrinsic::amdgcn_fma_legacy: {
1549 Value *Op0 = II.getArgOperand(0);
1550 Value *Op1 = II.getArgOperand(1);
1551 Value *Op2 = II.getArgOperand(2);
1552
1553 for (Value *Src : {Op0, Op1, Op2}) {
1556 }
1557
1558
1559
1560
1563
1564
1569 }
1570
1571
1572
1575 II.getModule(), Intrinsic::fma, II.getType()));
1576 return &II;
1577 }
1578 break;
1579 }
1580 case Intrinsic::amdgcn_is_shared:
1581 case Intrinsic::amdgcn_is_private: {
1582 Value *Src = II.getArgOperand(0);
1587
1590 break;
1591 }
1592 case Intrinsic::amdgcn_make_buffer_rsrc: {
1593 Value *Src = II.getArgOperand(0);
1596 return std::nullopt;
1597 }
1598 case Intrinsic::amdgcn_raw_buffer_store_format:
1599 case Intrinsic::amdgcn_struct_buffer_store_format:
1600 case Intrinsic::amdgcn_raw_tbuffer_store:
1601 case Intrinsic::amdgcn_struct_tbuffer_store:
1602 case Intrinsic::amdgcn_image_store_1d:
1603 case Intrinsic::amdgcn_image_store_1darray:
1604 case Intrinsic::amdgcn_image_store_2d:
1605 case Intrinsic::amdgcn_image_store_2darray:
1606 case Intrinsic::amdgcn_image_store_2darraymsaa:
1607 case Intrinsic::amdgcn_image_store_2dmsaa:
1608 case Intrinsic::amdgcn_image_store_3d:
1609 case Intrinsic::amdgcn_image_store_cube:
1610 case Intrinsic::amdgcn_image_store_mip_1d:
1611 case Intrinsic::amdgcn_image_store_mip_1darray:
1612 case Intrinsic::amdgcn_image_store_mip_2d:
1613 case Intrinsic::amdgcn_image_store_mip_2darray:
1614 case Intrinsic::amdgcn_image_store_mip_3d:
1615 case Intrinsic::amdgcn_image_store_mip_cube: {
1617 break;
1618
1619 APInt DemandedElts;
1620 if (ST->hasDefaultComponentBroadcast())
1622 else if (ST->hasDefaultComponentZero())
1624 else
1625 break;
1626
1627 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
1629 false)) {
1631 }
1632
1633 break;
1634 }
1635 case Intrinsic::amdgcn_prng_b32: {
1636 auto *Src = II.getArgOperand(0);
1639 }
1640 return std::nullopt;
1641 }
1642 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1643 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1644 Value *Src0 = II.getArgOperand(0);
1645 Value *Src1 = II.getArgOperand(1);
1650
1651 auto getFormatNumRegs = [](unsigned FormatVal) {
1652 switch (FormatVal) {
1655 return 6u;
1657 return 4u;
1660 return 8u;
1661 default:
1663 }
1664 };
1665
1666 bool MadeChange = false;
1667 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1668 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1669
1670
1671
1672 if (Src0Ty->getNumElements() > Src0NumElts) {
1676 MadeChange = true;
1677 }
1678
1679 if (Src1Ty->getNumElements() > Src1NumElts) {
1683 MadeChange = true;
1684 }
1685
1686 if (!MadeChange)
1687 return std::nullopt;
1688
1690 Args[0] = Src0;
1691 Args[1] = Src1;
1692
1697 }
1698 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
1699 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1700 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
1701 Value *Src0 = II.getArgOperand(1);
1702 Value *Src1 = II.getArgOperand(3);
1707
1708 bool MadeChange = false;
1711
1712
1713
1714 if (Src0Ty->getNumElements() > Src0NumElts) {
1718 MadeChange = true;
1719 }
1720
1721 if (Src1Ty->getNumElements() > Src1NumElts) {
1725 MadeChange = true;
1726 }
1727
1728 if (!MadeChange)
1729 return std::nullopt;
1730
1732 Args[1] = Src0;
1733 Args[3] = Src1;
1734
1736 IID, {II.getArgOperand(5)->getType(), Src0->getType(), Src1->getType()},
1737 Args, &II);
1740 }
1741 }
1745 }
1746 return std::nullopt;
1747}
1748
1749
1750
1751
1752
1753
1754
1755
1758 APInt DemandedElts,
1759 int DMaskIdx, bool IsLoad) {
1760
1762 : II.getOperand(0)->getType());
1763 unsigned VWidth = IIVTy->getNumElements();
1764 if (VWidth == 1)
1765 return nullptr;
1766 Type *EltTy = IIVTy->getElementType();
1767
1770
1771
1773
1774 if (DMaskIdx < 0) {
1775
1776
1777 const unsigned ActiveBits = DemandedElts.getActiveBits();
1778 const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();
1779
1780
1781
1782
1783 DemandedElts = (1 << ActiveBits) - 1;
1784
1785 if (UnusedComponentsAtFront > 0) {
1786 static const unsigned InvalidOffsetIdx = 0xf;
1787
1788 unsigned OffsetIdx;
1789 switch (II.getIntrinsicID()) {
1790 case Intrinsic::amdgcn_raw_buffer_load:
1791 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1792 OffsetIdx = 1;
1793 break;
1794 case Intrinsic::amdgcn_s_buffer_load:
1795
1796
1797
1798 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1799 OffsetIdx = InvalidOffsetIdx;
1800 else
1801 OffsetIdx = 1;
1802 break;
1803 case Intrinsic::amdgcn_struct_buffer_load:
1804 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1805 OffsetIdx = 2;
1806 break;
1807 default:
1808
1809 OffsetIdx = InvalidOffsetIdx;
1810 break;
1811 }
1812
1813 if (OffsetIdx != InvalidOffsetIdx) {
1814
1815 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1816 auto *Offset = Args[OffsetIdx];
1817 unsigned SingleComponentSizeInBits =
1819 unsigned OffsetAdd =
1820 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1821 auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1823 }
1824 }
1825 } else {
1826
1827
1829 unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1830
1831
1832 if (DMaskVal == 0)
1833 return nullptr;
1834
1835
1836 DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
1837
1838 unsigned NewDMaskVal = 0;
1839 unsigned OrigLdStIdx = 0;
1840 for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1841 const unsigned Bit = 1 << SrcIdx;
1842 if (!!(DMaskVal & Bit)) {
1843 if (!!DemandedElts[OrigLdStIdx])
1844 NewDMaskVal |= Bit;
1845 OrigLdStIdx++;
1846 }
1847 }
1848
1849 if (DMaskVal != NewDMaskVal)
1850 Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1851 }
1852
1853 unsigned NewNumElts = DemandedElts.popcount();
1854 if (!NewNumElts)
1856
1857 if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1858 if (DMaskIdx >= 0)
1859 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1860 return nullptr;
1861 }
1862
1863
1864
1867 return nullptr;
1868
1869 Type *NewTy =
1871 OverloadTys[0] = NewTy;
1872
1873 if (!IsLoad) {
1875 for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1876 if (DemandedElts[OrigStoreIdx])
1877 EltMask.push_back(OrigStoreIdx);
1878
1879 if (NewNumElts == 1)
1881 else
1883 }
1884
1889
1890 if (IsLoad) {
1891 if (NewNumElts == 1) {
1894 }
1895
1897 unsigned NewLoadIdx = 0;
1898 for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1899 if (!!DemandedElts[OrigLoadIdx])
1900 EltMask.push_back(NewLoadIdx++);
1901 else
1903 }
1904
1906
1907 return Shuffle;
1908 }
1909
1910 return NewCall;
1911}
1912
1915 APInt &UndefElts) const {
1917 if (!VT)
1918 return nullptr;
1919
1920 const unsigned FirstElt = DemandedElts.countr_zero();
1921 const unsigned LastElt = DemandedElts.getActiveBits() - 1;
1922 const unsigned MaskLen = LastElt - FirstElt + 1;
1923
1924 unsigned OldNumElts = VT->getNumElements();
1925 if (MaskLen == OldNumElts && MaskLen != 1)
1926 return nullptr;
1927
1928 Type *EltTy = VT->getElementType();
1930
1931
1932
1934 return nullptr;
1935
1936 Value *Src = II.getArgOperand(0);
1937
1938
1939
1941 II.getOperandBundlesAsDefs(OpBundles);
1942
1946
1947 if (MaskLen == 1) {
1949
1950
1952
1954 NewCall, FirstElt);
1955 }
1956
1958 for (unsigned I = 0; I != MaskLen; ++I) {
1959 if (DemandedElts[FirstElt + I])
1960 ExtractMask[I] = FirstElt + I;
1961 }
1962
1964
1965
1967
1969 for (unsigned I = 0; I != MaskLen; ++I) {
1970 if (DemandedElts[FirstElt + I])
1971 InsertMask[FirstElt + I] = I;
1972 }
1973
1974
1975
1977}
1978
1981 APInt &UndefElts2, APInt &UndefElts3,
1983 SimplifyAndSetOp) const {
1984 switch (II.getIntrinsicID()) {
1985 case Intrinsic::amdgcn_readfirstlane:
1986 SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1988 case Intrinsic::amdgcn_raw_buffer_load:
1989 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1990 case Intrinsic::amdgcn_raw_buffer_load_format:
1991 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1992 case Intrinsic::amdgcn_raw_tbuffer_load:
1993 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1994 case Intrinsic::amdgcn_s_buffer_load:
1995 case Intrinsic::amdgcn_struct_buffer_load:
1996 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1997 case Intrinsic::amdgcn_struct_buffer_load_format:
1998 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1999 case Intrinsic::amdgcn_struct_tbuffer_load:
2000 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2002 default: {
2003 if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
2005 }
2006 break;
2007 }
2008 }
2009 return std::nullopt;
2010}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
Definition AMDGPUInstCombineIntrinsic.cpp:492
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
Definition AMDGPUInstCombineIntrinsic.cpp:499
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
Definition AMDGPUInstCombineIntrinsic.cpp:546
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
Definition AMDGPUInstCombineIntrinsic.cpp:105
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
Definition AMDGPUInstCombineIntrinsic.cpp:432
static APInt defaultComponentBroadcast(Value *V)
Definition AMDGPUInstCombineIntrinsic.cpp:458
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
Definition AMDGPUInstCombineIntrinsic.cpp:120
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Definition AMDGPUInstCombineIntrinsic.cpp:46
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Definition AMDGPUInstCombineIntrinsic.cpp:1756
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
Definition AMDGPUInstCombineIntrinsic.cpp:154
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Definition AMDGPUInstCombineIntrinsic.cpp:67
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Definition AMDGPUInstCombineIntrinsic.cpp:414
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
This file a TargetTransformInfoImplBase conforming object specific to the AMDGPU target machine.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
Definition AMDGPUInstCombineIntrinsic.cpp:518
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition AMDGPUInstCombineIntrinsic.cpp:639
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
Definition AMDGPUInstCombineIntrinsic.cpp:557
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition AMDGPUInstCombineIntrinsic.cpp:1979
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
Definition AMDGPUInstCombineIntrinsic.cpp:1913
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition AMDGPUInstCombineIntrinsic.cpp:391
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.