LLVM: lib/Target/X86/X86InstCombineIntrinsic.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
18#include "llvm/IR/IntrinsicsX86.h"
21#include
22
23using namespace llvm;
25
26#define DEBUG_TYPE "x86tti"
27
28
29
35 assert(V && "Vector must be foldable");
36 return V;
37}
38
39
40
42
45
46
50 return ExtMask;
51
52 return nullptr;
53}
54
55
56
57
59 Value *Ptr = II.getOperand(0);
60 Value *Mask = II.getOperand(1);
62
63
66
67
68
70
72 II.getType(), Ptr, Align(1), BoolMask, ZeroVec);
74 }
75
76 return nullptr;
77}
78
79
80
81
83 Value *Ptr = II.getOperand(0);
84 Value *Mask = II.getOperand(1);
85 Value *Vec = II.getOperand(2);
86
87
90 return true;
91 }
92
93
94
95 if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
96 return false;
97
98
99
104
106
107
109 return true;
110 }
111
112 return false;
113}
114
117 bool LogicalShift = false;
118 bool ShiftLeft = false;
119 bool IsImm = false;
120
121 switch (II.getIntrinsicID()) {
122 default:
124 case Intrinsic::x86_sse2_psrai_d:
125 case Intrinsic::x86_sse2_psrai_w:
126 case Intrinsic::x86_avx2_psrai_d:
127 case Intrinsic::x86_avx2_psrai_w:
128 case Intrinsic::x86_avx512_psrai_q_128:
129 case Intrinsic::x86_avx512_psrai_q_256:
130 case Intrinsic::x86_avx512_psrai_d_512:
131 case Intrinsic::x86_avx512_psrai_q_512:
132 case Intrinsic::x86_avx512_psrai_w_512:
133 IsImm = true;
134 [[fallthrough]];
135 case Intrinsic::x86_sse2_psra_d:
136 case Intrinsic::x86_sse2_psra_w:
137 case Intrinsic::x86_avx2_psra_d:
138 case Intrinsic::x86_avx2_psra_w:
139 case Intrinsic::x86_avx512_psra_q_128:
140 case Intrinsic::x86_avx512_psra_q_256:
141 case Intrinsic::x86_avx512_psra_d_512:
142 case Intrinsic::x86_avx512_psra_q_512:
143 case Intrinsic::x86_avx512_psra_w_512:
144 LogicalShift = false;
145 ShiftLeft = false;
146 break;
147 case Intrinsic::x86_sse2_psrli_d:
148 case Intrinsic::x86_sse2_psrli_q:
149 case Intrinsic::x86_sse2_psrli_w:
150 case Intrinsic::x86_avx2_psrli_d:
151 case Intrinsic::x86_avx2_psrli_q:
152 case Intrinsic::x86_avx2_psrli_w:
153 case Intrinsic::x86_avx512_psrli_d_512:
154 case Intrinsic::x86_avx512_psrli_q_512:
155 case Intrinsic::x86_avx512_psrli_w_512:
156 IsImm = true;
157 [[fallthrough]];
158 case Intrinsic::x86_sse2_psrl_d:
159 case Intrinsic::x86_sse2_psrl_q:
160 case Intrinsic::x86_sse2_psrl_w:
161 case Intrinsic::x86_avx2_psrl_d:
162 case Intrinsic::x86_avx2_psrl_q:
163 case Intrinsic::x86_avx2_psrl_w:
164 case Intrinsic::x86_avx512_psrl_d_512:
165 case Intrinsic::x86_avx512_psrl_q_512:
166 case Intrinsic::x86_avx512_psrl_w_512:
167 LogicalShift = true;
168 ShiftLeft = false;
169 break;
170 case Intrinsic::x86_sse2_pslli_d:
171 case Intrinsic::x86_sse2_pslli_q:
172 case Intrinsic::x86_sse2_pslli_w:
173 case Intrinsic::x86_avx2_pslli_d:
174 case Intrinsic::x86_avx2_pslli_q:
175 case Intrinsic::x86_avx2_pslli_w:
176 case Intrinsic::x86_avx512_pslli_d_512:
177 case Intrinsic::x86_avx512_pslli_q_512:
178 case Intrinsic::x86_avx512_pslli_w_512:
179 IsImm = true;
180 [[fallthrough]];
181 case Intrinsic::x86_sse2_psll_d:
182 case Intrinsic::x86_sse2_psll_q:
183 case Intrinsic::x86_sse2_psll_w:
184 case Intrinsic::x86_avx2_psll_d:
185 case Intrinsic::x86_avx2_psll_q:
186 case Intrinsic::x86_avx2_psll_w:
187 case Intrinsic::x86_avx512_psll_d_512:
188 case Intrinsic::x86_avx512_psll_q_512:
189 case Intrinsic::x86_avx512_psll_w_512:
190 LogicalShift = true;
191 ShiftLeft = true;
192 break;
193 }
194 assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
195
196 Value *Vec = II.getArgOperand(0);
197 Value *Amt = II.getArgOperand(1);
199 Type *SVT = VT->getElementType();
201 unsigned VWidth = VT->getNumElements();
203
204
205
206
207 if (IsImm) {
208 assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type");
212 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
213 Amt = Builder.CreateVectorSplat(VWidth, Amt);
214 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
215 : Builder.CreateLShr(Vec, Amt))
216 : Builder.CreateAShr(Vec, Amt));
217 }
219 if (LogicalShift)
221 Amt = ConstantInt::get(SVT, BitWidth - 1);
222 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
223 }
224 } else {
225
226
229 "Unexpected shift-by-scalar type");
234 Amt, DemandedLower, II.getDataLayout());
236 Amt, DemandedUpper, II.getDataLayout());
238 (DemandedUpper.isZero() || KnownUpperBits.isZero())) {
240 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
241 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
242 : Builder.CreateLShr(Vec, Amt))
243 : Builder.CreateAShr(Vec, Amt));
244 }
245 }
246
247
249 if (!CDV)
250 return nullptr;
251
252
253
256 "Unexpected shift-by-scalar type");
257
258
260 for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {
261 unsigned SubEltIdx = (NumSubElts - 1) - i;
262 auto *SubElt = cast(CDV->getElementAsConstant(SubEltIdx));
264 Count |= SubElt->getValue().zextOrTrunc(64);
265 }
266
267
268 if (Count.isZero())
269 return Vec;
270
271
273
274 if (LogicalShift)
276
277
279 }
280
281
282 auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
283 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
284
285 if (ShiftLeft)
286 return Builder.CreateShl(Vec, ShiftVec);
287
288 if (LogicalShift)
289 return Builder.CreateLShr(Vec, ShiftVec);
290
291 return Builder.CreateAShr(Vec, ShiftVec);
292}
293
294
295
296
299 bool LogicalShift = false;
300 bool ShiftLeft = false;
301
302 switch (II.getIntrinsicID()) {
303 default:
305 case Intrinsic::x86_avx2_psrav_d:
306 case Intrinsic::x86_avx2_psrav_d_256:
307 case Intrinsic::x86_avx512_psrav_q_128:
308 case Intrinsic::x86_avx512_psrav_q_256:
309 case Intrinsic::x86_avx512_psrav_d_512:
310 case Intrinsic::x86_avx512_psrav_q_512:
311 case Intrinsic::x86_avx512_psrav_w_128:
312 case Intrinsic::x86_avx512_psrav_w_256:
313 case Intrinsic::x86_avx512_psrav_w_512:
314 LogicalShift = false;
315 ShiftLeft = false;
316 break;
317 case Intrinsic::x86_avx2_psrlv_d:
318 case Intrinsic::x86_avx2_psrlv_d_256:
319 case Intrinsic::x86_avx2_psrlv_q:
320 case Intrinsic::x86_avx2_psrlv_q_256:
321 case Intrinsic::x86_avx512_psrlv_d_512:
322 case Intrinsic::x86_avx512_psrlv_q_512:
323 case Intrinsic::x86_avx512_psrlv_w_128:
324 case Intrinsic::x86_avx512_psrlv_w_256:
325 case Intrinsic::x86_avx512_psrlv_w_512:
326 LogicalShift = true;
327 ShiftLeft = false;
328 break;
329 case Intrinsic::x86_avx2_psllv_d:
330 case Intrinsic::x86_avx2_psllv_d_256:
331 case Intrinsic::x86_avx2_psllv_q:
332 case Intrinsic::x86_avx2_psllv_q_256:
333 case Intrinsic::x86_avx512_psllv_d_512:
334 case Intrinsic::x86_avx512_psllv_q_512:
335 case Intrinsic::x86_avx512_psllv_w_128:
336 case Intrinsic::x86_avx512_psllv_w_256:
337 case Intrinsic::x86_avx512_psllv_w_512:
338 LogicalShift = true;
339 ShiftLeft = true;
340 break;
341 }
342 assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
343
344 Value *Vec = II.getArgOperand(0);
345 Value *Amt = II.getArgOperand(1);
347 Type *SVT = VT->getElementType();
348 int NumElts = VT->getNumElements();
350
351
352
356 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
357 : Builder.CreateLShr(Vec, Amt))
358 : Builder.CreateAShr(Vec, Amt));
359 }
360
361
363 if (!CShift)
364 return nullptr;
365
366
367
368 bool AnyOutOfRange = false;
370 for (int I = 0; I < NumElts; ++I) {
371 auto *CElt = CShift->getAggregateElement(I);
374 continue;
375 }
376
378 if (!COp)
379 return nullptr;
380
381
382
383
384 APInt ShiftVal = COp->getValue();
386 AnyOutOfRange = LogicalShift;
388 continue;
389 }
390
392 }
393
394
395
396 auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
399 for (int Idx : ShiftAmts) {
400 if (Idx < 0) {
402 } else {
403 assert(LogicalShift && "Logical shift expected");
405 }
406 }
408 }
409
410
411 if (AnyOutOfRange)
412 return nullptr;
413
414
416 for (int Idx : ShiftAmts) {
417 if (Idx < 0)
419 else
420 ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
421 }
423
424 if (ShiftLeft)
425 return Builder.CreateShl(Vec, ShiftVec);
426
427 if (LogicalShift)
428 return Builder.CreateLShr(Vec, ShiftVec);
429
430 return Builder.CreateAShr(Vec, ShiftVec);
431}
432
435 Value *Arg0 = II.getArgOperand(0);
436 Value *Arg1 = II.getArgOperand(1);
437 Type *ResTy = II.getType();
438
439
442
445 unsigned NumSrcElts = ArgTy->getNumElements();
447 "Unexpected packing types");
448
449 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
451 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
452 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
453 "Unexpected packing types");
454
455
457 return nullptr;
458
459
460
461 APInt MinValue, MaxValue;
462 if (IsSigned) {
463
464
465
466 MinValue =
468 MaxValue =
470 } else {
471
472
473
476 }
477
480 Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
481 Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
482 Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
483 Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
484
485
487 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
488 for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
489 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
490 for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
491 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
492 }
493 auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
494
495
496 return Builder.CreateTrunc(Shuffle, ResTy);
497}
498
501 bool IsRounding) {
502 Value *Arg0 = II.getArgOperand(0);
503 Value *Arg1 = II.getArgOperand(1);
506 assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&
507 "Unexpected PMULH types");
508 assert((!IsRounding || IsSigned) && "PMULHRS instruction must be signed");
509
510
513
514
517
518
519 if (!IsRounding) {
521 return IsSigned ? Builder.CreateAShr(Arg1, 15)
524 return IsSigned ? Builder.CreateAShr(Arg0, 15)
526 }
527
528
530 return nullptr;
531
532
533 auto Cast =
534 IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
536 Value *LHS = Builder.CreateCast(Cast, Arg0, ExtTy);
537 Value *RHS = Builder.CreateCast(Cast, Arg1, ExtTy);
539
540 if (IsRounding) {
541
542
545 Mul = Builder.CreateLShr(Mul, 14);
546 Mul = Builder.CreateTrunc(Mul, RndTy);
547 Mul = Builder.CreateAdd(Mul, ConstantInt::get(RndTy, 1));
548 Mul = Builder.CreateLShr(Mul, 1);
549 } else {
550
551 Mul = Builder.CreateLShr(Mul, 16);
552 }
553
554 return Builder.CreateTrunc(Mul, ResTy);
555}
556
559 bool IsPMADDWD) {
560 Value *Arg0 = II.getArgOperand(0);
561 Value *Arg1 = II.getArgOperand(1);
564
565 unsigned NumDstElts = ResTy->getNumElements();
566 assert(ArgTy->getNumElements() == (2 * NumDstElts) &&
567 ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
568 "Unexpected PMADD types");
569
570
573
574
577
578
580 return nullptr;
581
582
583
584
585
586
588 for (unsigned I = 0; I != NumDstElts; ++I) {
591 }
592
593 auto *LHSLo = Builder.CreateShuffleVector(Arg0, LoMask);
594 auto *LHSHi = Builder.CreateShuffleVector(Arg0, HiMask);
595 auto *RHSLo = Builder.CreateShuffleVector(Arg1, LoMask);
596 auto *RHSHi = Builder.CreateShuffleVector(Arg1, HiMask);
597
598 auto LHSCast =
599 IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
600 LHSLo = Builder.CreateCast(LHSCast, LHSLo, ResTy);
601 LHSHi = Builder.CreateCast(LHSCast, LHSHi, ResTy);
602 RHSLo = Builder.CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);
603 RHSHi = Builder.CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);
604 Value *Lo = Builder.CreateMul(LHSLo, RHSLo);
605 Value *Hi = Builder.CreateMul(LHSHi, RHSHi);
606 return IsPMADDWD
607 ? Builder.CreateAdd(Lo, Hi)
608 : Builder.CreateIntrinsic(ResTy, Intrinsic::sadd_sat, {Lo, Hi});
609}
610
613 Value *Arg = II.getArgOperand(0);
614 Type *ResTy = II.getType();
615
616
619
620
621
622 if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb)
623 return nullptr;
624
626
627
628
629
630
631
632 unsigned NumElts = ArgTy->getNumElements();
634
636 Res = Builder.CreateIsNeg(Res);
637 Res = Builder.CreateBitCast(Res, IntegerTy);
638 Res = Builder.CreateZExtOrTrunc(Res, ResTy);
639 return Res;
640}
641
644 Value *CarryIn = II.getArgOperand(0);
645 Value *Op1 = II.getArgOperand(1);
646 Value *Op2 = II.getArgOperand(2);
647 Type *RetTy = II.getType();
651 "Unexpected types for x86 addcarry");
652
653
655 Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
656 {Op1, Op2});
657
658 Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
659 Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
660 Builder.getInt8Ty());
662 Res = Builder.CreateInsertValue(Res, UAddOV, 0);
663 return Builder.CreateInsertValue(Res, UAddResult, 1);
664 }
665
666 return nullptr;
667}
668
671
673 if (!ArgImm || ArgImm->getValue().uge(256))
674 return nullptr;
675
676 Value *ArgA = II.getArgOperand(0);
677 Value *ArgB = II.getArgOperand(1);
678 Value *ArgC = II.getArgOperand(2);
679
681
682 auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
683 return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
684 };
685 auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
686 return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
687 };
688 auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
689 return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
690 };
691 auto Not = [&](auto V) -> std::pair<Value *, uint8_t> {
692 return {Builder.CreateNot(V.first), ~V.second};
693 };
694 auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); };
695 auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); };
696 auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); };
697
701
702 bool ABIsConst = AIsConst && BIsConst;
703 bool ACIsConst = AIsConst && CIsConst;
704 bool BCIsConst = BIsConst && CIsConst;
705 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
706
707
708
709
710
711 std::pair<Value *, uint8_t> A = {ArgA, 0xf0};
712 std::pair<Value *, uint8_t> B = {ArgB, 0xcc};
713 std::pair<Value *, uint8_t> C = {ArgC, 0xaa};
714 std::pair<Value *, uint8_t> Res = {nullptr, 0};
715
716
717
718
719
720
721 uint8_t Imm = ArgImm->getValue().getZExtValue();
722 switch (Imm) {
723 case 0x0:
725 break;
726 case 0x1:
727 if (ABCIsConst)
729 break;
730 case 0x2:
731 if (ABCIsConst)
733 break;
734 case 0x3:
735 if (ABIsConst)
737 break;
738 case 0x4:
739 if (ABCIsConst)
741 break;
742 case 0x5:
743 if (ACIsConst)
745 break;
746 case 0x6:
747 if (ABCIsConst)
749 break;
750 case 0x7:
751 if (ABCIsConst)
753 break;
754 case 0x8:
755 if (ABCIsConst)
757 break;
758 case 0x9:
759 if (ABCIsConst)
761 break;
762 case 0xa:
763 if (ACIsConst)
765 break;
766 case 0xb:
767 if (ABCIsConst)
768 Res = Nor(A, Nor(C, Not(B)));
769 break;
770 case 0xc:
771 if (ABIsConst)
773 break;
774 case 0xd:
775 if (ABCIsConst)
776 Res = Nor(A, Nor(B, Not(C)));
777 break;
778 case 0xe:
779 if (ABCIsConst)
781 break;
782 case 0xf:
783 Res = Not(A);
784 break;
785 case 0x10:
786 if (ABCIsConst)
788 break;
789 case 0x11:
790 if (BCIsConst)
792 break;
793 case 0x12:
794 if (ABCIsConst)
796 break;
797 case 0x13:
798 if (ABCIsConst)
800 break;
801 case 0x14:
802 if (ABCIsConst)
804 break;
805 case 0x15:
806 if (ABCIsConst)
808 break;
809 case 0x16:
810 if (ABCIsConst)
812 break;
813 case 0x17:
814 if (ABCIsConst)
816 break;
817 case 0x18:
818 if (ABCIsConst)
819 Res = Nor(Xnor(A, B), Xnor(A, C));
820 break;
821 case 0x19:
822 if (ABCIsConst)
823 Res = And(Nand(A, B), Xnor(B, C));
824 break;
825 case 0x1a:
826 if (ABCIsConst)
828 break;
829 case 0x1b:
830 if (ABCIsConst)
832 break;
833 case 0x1c:
834 if (ABCIsConst)
836 break;
837 case 0x1d:
838 if (ABCIsConst)
840 break;
841 case 0x1e:
842 if (ABCIsConst)
844 break;
845 case 0x1f:
846 if (ABCIsConst)
848 break;
849 case 0x20:
850 if (ABCIsConst)
852 break;
853 case 0x21:
854 if (ABCIsConst)
856 break;
857 case 0x22:
858 if (BCIsConst)
860 break;
861 case 0x23:
862 if (ABCIsConst)
863 Res = Nor(B, Nor(C, Not(A)));
864 break;
865 case 0x24:
866 if (ABCIsConst)
867 Res = Nor(Xnor(A, B), Xor(A, C));
868 break;
869 case 0x25:
870 if (ABCIsConst)
871 Res = Xor(A, Nand(Nand(A, B), C));
872 break;
873 case 0x26:
874 if (ABCIsConst)
876 break;
877 case 0x27:
878 if (ABCIsConst)
880 break;
881 case 0x28:
882 if (ABCIsConst)
884 break;
885 case 0x29:
886 if (ABCIsConst)
888 break;
889 case 0x2a:
890 if (ABCIsConst)
892 break;
893 case 0x2b:
894 if (ABCIsConst)
896 break;
897 case 0x2c:
898 if (ABCIsConst)
899 Res = Nor(Xnor(A, B), Nor(B, C));
900 break;
901 case 0x2d:
902 if (ABCIsConst)
904 break;
905 case 0x2e:
906 if (ABCIsConst)
908 break;
909 case 0x2f:
910 if (ABCIsConst)
911 Res = Nand(A, Or(B, Not(C)));
912 break;
913 case 0x30:
914 if (ABIsConst)
916 break;
917 case 0x31:
918 if (ABCIsConst)
919 Res = Nor(Nor(A, Not(C)), B);
920 break;
921 case 0x32:
922 if (ABCIsConst)
924 break;
925 case 0x33:
926 Res = Not(B);
927 break;
928 case 0x34:
929 if (ABCIsConst)
931 break;
932 case 0x35:
933 if (ABCIsConst)
935 break;
936 case 0x36:
937 if (ABCIsConst)
939 break;
940 case 0x37:
941 if (ABCIsConst)
943 break;
944 case 0x38:
945 if (ABCIsConst)
946 Res = Nor(Xnor(A, B), Nor(A, C));
947 break;
948 case 0x39:
949 if (ABCIsConst)
951 break;
952 case 0x3a:
953 if (ABCIsConst)
955 break;
956 case 0x3b:
957 if (ABCIsConst)
958 Res = Nand(Or(A, Not(C)), B);
959 break;
960 case 0x3c:
962 break;
963 case 0x3d:
964 if (ABCIsConst)
966 break;
967 case 0x3e:
968 if (ABCIsConst)
969 Res = Xor(A, Or(Nor(A, Not(C)), B));
970 break;
971 case 0x3f:
972 if (ABIsConst)
974 break;
975 case 0x40:
976 if (ABCIsConst)
978 break;
979 case 0x41:
980 if (ABCIsConst)
982 break;
983 case 0x42:
984 if (ABCIsConst)
985 Res = Nor(Xor(A, B), Xnor(A, C));
986 break;
987 case 0x43:
988 if (ABCIsConst)
989 Res = Xor(A, Nand(Nand(A, C), B));
990 break;
991 case 0x44:
992 if (BCIsConst)
994 break;
995 case 0x45:
996 if (ABCIsConst)
997 Res = Nor(Nor(B, Not(A)), C);
998 break;
999 case 0x46:
1000 if (ABCIsConst)
1002 break;
1003 case 0x47:
1004 if (ABCIsConst)
1005 Res = Xor(Or(Xnor(A, C), B), C);
1006 break;
1007 case 0x48:
1008 if (ABCIsConst)
1010 break;
1011 case 0x49:
1012 if (ABCIsConst)
1014 break;
1015 case 0x4a:
1016 if (ABCIsConst)
1017 Res = Nor(Xnor(A, C), Nor(B, C));
1018 break;
1019 case 0x4b:
1020 if (ABCIsConst)
1021 Res = Xor(A, Or(C, Not(B)));
1022 break;
1023 case 0x4c:
1024 if (ABCIsConst)
1025 Res = And(Nand(A, C), B);
1026 break;
1027 case 0x4d:
1028 if (ABCIsConst)
1030 break;
1031 case 0x4e:
1032 if (ABCIsConst)
1034 break;
1035 case 0x4f:
1036 if (ABCIsConst)
1037 Res = Nand(A, Nand(B, Not(C)));
1038 break;
1039 case 0x50:
1040 if (ACIsConst)
1042 break;
1043 case 0x51:
1044 if (ABCIsConst)
1045 Res = Nor(Nor(A, Not(B)), C);
1046 break;
1047 case 0x52:
1048 if (ABCIsConst)
1050 break;
1051 case 0x53:
1052 if (ABCIsConst)
1053 Res = Xor(Or(Xnor(B, C), A), C);
1054 break;
1055 case 0x54:
1056 if (ABCIsConst)
1058 break;
1059 case 0x55:
1060 Res = Not(C);
1061 break;
1062 case 0x56:
1063 if (ABCIsConst)
1065 break;
1066 case 0x57:
1067 if (ABCIsConst)
1069 break;
1070 case 0x58:
1071 if (ABCIsConst)
1072 Res = Nor(Nor(A, B), Xnor(A, C));
1073 break;
1074 case 0x59:
1075 if (ABCIsConst)
1076 Res = Xor(Or(A, Not(B)), C);
1077 break;
1078 case 0x5a:
1080 break;
1081 case 0x5b:
1082 if (ABCIsConst)
1084 break;
1085 case 0x5c:
1086 if (ABCIsConst)
1088 break;
1089 case 0x5d:
1090 if (ABCIsConst)
1091 Res = Nand(Or(A, Not(B)), C);
1092 break;
1093 case 0x5e:
1094 if (ABCIsConst)
1095 Res = Xor(A, Or(Nor(A, Not(B)), C));
1096 break;
1097 case 0x5f:
1098 if (ACIsConst)
1100 break;
1101 case 0x60:
1102 if (ABCIsConst)
1104 break;
1105 case 0x61:
1106 if (ABCIsConst)
1108 break;
1109 case 0x62:
1110 if (ABCIsConst)
1111 Res = Nor(Nor(A, C), Xnor(B, C));
1112 break;
1113 case 0x63:
1114 if (ABCIsConst)
1115 Res = Xor(B, Or(C, Not(A)));
1116 break;
1117 case 0x64:
1118 if (ABCIsConst)
1119 Res = Nor(Nor(A, B), Xnor(B, C));
1120 break;
1121 case 0x65:
1122 if (ABCIsConst)
1123 Res = Xor(Or(B, Not(A)), C);
1124 break;
1125 case 0x66:
1127 break;
1128 case 0x67:
1129 if (ABCIsConst)
1131 break;
1132 case 0x68:
1133 if (ABCIsConst)
1134 Res = Xor(Xor(A, B), Nor(Nor(A, B), C));
1135 break;
1136 case 0x69:
1137 if (ABCIsConst)
1138 Res = Xor(Xnor(A, B), C);
1139 break;
1140 case 0x6a:
1141 if (ABCIsConst)
1143 break;
1144 case 0x6b:
1145 if (ABCIsConst)
1146 Res = Or(Nor(A, B), Xor(Xnor(A, B), C));
1147 break;
1148 case 0x6c:
1149 if (ABCIsConst)
1151 break;
1152 case 0x6d:
1153 if (ABCIsConst)
1154 Res = Xor(Or(Xnor(A, B), Nor(A, C)), C);
1155 break;
1156 case 0x6e:
1157 if (ABCIsConst)
1158 Res = Or(Nor(A, Not(B)), Xor(B, C));
1159 break;
1160 case 0x6f:
1161 if (ABCIsConst)
1162 Res = Nand(A, Xnor(B, C));
1163 break;
1164 case 0x70:
1165 if (ABCIsConst)
1166 Res = And(A, Nand(B, C));
1167 break;
1168 case 0x71:
1169 if (ABCIsConst)
1171 break;
1172 case 0x72:
1173 if (ABCIsConst)
1175 break;
1176 case 0x73:
1177 if (ABCIsConst)
1178 Res = Nand(Nand(A, Not(C)), B);
1179 break;
1180 case 0x74:
1181 if (ABCIsConst)
1183 break;
1184 case 0x75:
1185 if (ABCIsConst)
1186 Res = Nand(Nand(A, Not(B)), C);
1187 break;
1188 case 0x76:
1189 if (ABCIsConst)
1190 Res = Xor(B, Or(Nor(B, Not(A)), C));
1191 break;
1192 case 0x77:
1193 if (BCIsConst)
1195 break;
1196 case 0x78:
1197 if (ABCIsConst)
1199 break;
1200 case 0x79:
1201 if (ABCIsConst)
1202 Res = Xor(Or(Xnor(A, B), Nor(B, C)), C);
1203 break;
1204 case 0x7a:
1205 if (ABCIsConst)
1206 Res = Or(Xor(A, C), Nor(B, Not(A)));
1207 break;
1208 case 0x7b:
1209 if (ABCIsConst)
1210 Res = Nand(Xnor(A, C), B);
1211 break;
1212 case 0x7c:
1213 if (ABCIsConst)
1214 Res = Or(Xor(A, B), Nor(C, Not(A)));
1215 break;
1216 case 0x7d:
1217 if (ABCIsConst)
1218 Res = Nand(Xnor(A, B), C);
1219 break;
1220 case 0x7e:
1221 if (ABCIsConst)
1223 break;
1224 case 0x7f:
1225 if (ABCIsConst)
1226 Res = Nand(And(A, B), C);
1227 break;
1228 case 0x80:
1229 if (ABCIsConst)
1231 break;
1232 case 0x81:
1233 if (ABCIsConst)
1235 break;
1236 case 0x82:
1237 if (ABCIsConst)
1238 Res = And(Xnor(A, B), C);
1239 break;
1240 case 0x83:
1241 if (ABCIsConst)
1242 Res = Nor(Xor(A, B), Nor(C, Not(A)));
1243 break;
1244 case 0x84:
1245 if (ABCIsConst)
1246 Res = And(Xnor(A, C), B);
1247 break;
1248 case 0x85:
1249 if (ABCIsConst)
1250 Res = Nor(Xor(A, C), Nor(B, Not(A)));
1251 break;
1252 case 0x86:
1253 if (ABCIsConst)
1254 Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C);
1255 break;
1256 case 0x87:
1257 if (ABCIsConst)
1258 Res = Xor(A, Nand(B, C));
1259 break;
1260 case 0x88:
1262 break;
1263 case 0x89:
1264 if (ABCIsConst)
1265 Res = Xor(B, Nor(Nor(B, Not(A)), C));
1266 break;
1267 case 0x8a:
1268 if (ABCIsConst)
1269 Res = And(Nand(A, Not(B)), C);
1270 break;
1271 case 0x8b:
1272 if (ABCIsConst)
1274 break;
1275 case 0x8c:
1276 if (ABCIsConst)
1277 Res = And(Nand(A, Not(C)), B);
1278 break;
1279 case 0x8d:
1280 if (ABCIsConst)
1282 break;
1283 case 0x8e:
1284 if (ABCIsConst)
1286 break;
1287 case 0x8f:
1288 if (ABCIsConst)
1289 Res = Nand(A, Nand(B, C));
1290 break;
1291 case 0x90:
1292 if (ABCIsConst)
1293 Res = And(A, Xnor(B, C));
1294 break;
1295 case 0x91:
1296 if (ABCIsConst)
1297 Res = Nor(Nor(A, Not(B)), Xor(B, C));
1298 break;
1299 case 0x92:
1300 if (ABCIsConst)
1301 Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C);
1302 break;
1303 case 0x93:
1304 if (ABCIsConst)
1305 Res = Xor(Nand(A, C), B);
1306 break;
1307 case 0x94:
1308 if (ABCIsConst)
1309 Res = Nor(Nor(A, B), Xor(Xnor(A, B), C));
1310 break;
1311 case 0x95:
1312 if (ABCIsConst)
1313 Res = Xor(Nand(A, B), C);
1314 break;
1315 case 0x96:
1316 if (ABCIsConst)
1318 break;
1319 case 0x97:
1320 if (ABCIsConst)
1322 break;
1323 case 0x98:
1324 if (ABCIsConst)
1325 Res = Nor(Nor(A, B), Xor(B, C));
1326 break;
1327 case 0x99:
1328 if (BCIsConst)
1330 break;
1331 case 0x9a:
1332 if (ABCIsConst)
1333 Res = Xor(Nor(B, Not(A)), C);
1334 break;
1335 case 0x9b:
1336 if (ABCIsConst)
1337 Res = Or(Nor(A, B), Xnor(B, C));
1338 break;
1339 case 0x9c:
1340 if (ABCIsConst)
1341 Res = Xor(B, Nor(C, Not(A)));
1342 break;
1343 case 0x9d:
1344 if (ABCIsConst)
1345 Res = Or(Nor(A, C), Xnor(B, C));
1346 break;
1347 case 0x9e:
1348 if (ABCIsConst)
1350 break;
1351 case 0x9f:
1352 if (ABCIsConst)
1353 Res = Nand(A, Xor(B, C));
1354 break;
1355 case 0xa0:
1357 break;
1358 case 0xa1:
1359 if (ABCIsConst)
1360 Res = Xor(A, Nor(Nor(A, Not(B)), C));
1361 break;
1362 case 0xa2:
1363 if (ABCIsConst)
1364 Res = And(Or(A, Not(B)), C);
1365 break;
1366 case 0xa3:
1367 if (ABCIsConst)
1369 break;
1370 case 0xa4:
1371 if (ABCIsConst)
1372 Res = Xor(A, Nor(Nor(A, B), C));
1373 break;
1374 case 0xa5:
1375 if (ACIsConst)
1377 break;
1378 case 0xa6:
1379 if (ABCIsConst)
1380 Res = Xor(Nor(A, Not(B)), C);
1381 break;
1382 case 0xa7:
1383 if (ABCIsConst)
1384 Res = Or(Nor(A, B), Xnor(A, C));
1385 break;
1386 case 0xa8:
1387 if (ABCIsConst)
1389 break;
1390 case 0xa9:
1391 if (ABCIsConst)
1393 break;
1394 case 0xaa:
1395 Res = C;
1396 break;
1397 case 0xab:
1398 if (ABCIsConst)
1400 break;
1401 case 0xac:
1402 if (ABCIsConst)
1403 Res = Xor(Nor(Xnor(B, C), A), C);
1404 break;
1405 case 0xad:
1406 if (ABCIsConst)
1407 Res = Or(Xnor(A, C), And(B, C));
1408 break;
1409 case 0xae:
1410 if (ABCIsConst)
1411 Res = Or(Nor(A, Not(B)), C);
1412 break;
1413 case 0xaf:
1414 if (ACIsConst)
1416 break;
1417 case 0xb0:
1418 if (ABCIsConst)
1419 Res = And(A, Nand(B, Not(C)));
1420 break;
1421 case 0xb1:
1422 if (ABCIsConst)
1424 break;
1425 case 0xb2:
1426 if (ABCIsConst)
1427 Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A);
1428 break;
1429 case 0xb3:
1430 if (ABCIsConst)
1431 Res = Nand(Nand(A, C), B);
1432 break;
1433 case 0xb4:
1434 if (ABCIsConst)
1435 Res = Xor(A, Nor(C, Not(B)));
1436 break;
1437 case 0xb5:
1438 if (ABCIsConst)
1439 Res = Or(Xnor(A, C), Nor(B, C));
1440 break;
1441 case 0xb6:
1442 if (ABCIsConst)
1444 break;
1445 case 0xb7:
1446 if (ABCIsConst)
1447 Res = Nand(Xor(A, C), B);
1448 break;
1449 case 0xb8:
1450 if (ABCIsConst)
1451 Res = Xor(Nor(Xnor(A, C), B), C);
1452 break;
1453 case 0xb9:
1454 if (ABCIsConst)
1456 break;
1457 case 0xba:
1458 if (ABCIsConst)
1459 Res = Or(Nor(B, Not(A)), C);
1460 break;
1461 case 0xbb:
1462 if (BCIsConst)
1464 break;
1465 case 0xbc:
1466 if (ABCIsConst)
1468 break;
1469 case 0xbd:
1470 if (ABCIsConst)
1471 Res = Or(Xor(A, B), Xnor(A, C));
1472 break;
1473 case 0xbe:
1474 if (ABCIsConst)
1476 break;
1477 case 0xbf:
1478 if (ABCIsConst)
1480 break;
1481 case 0xc0:
1483 break;
1484 case 0xc1:
1485 if (ABCIsConst)
1486 Res = Xor(A, Nor(Nor(A, Not(C)), B));
1487 break;
1488 case 0xc2:
1489 if (ABCIsConst)
1490 Res = Xor(A, Nor(Nor(A, C), B));
1491 break;
1492 case 0xc3:
1493 if (ABIsConst)
1495 break;
1496 case 0xc4:
1497 if (ABCIsConst)
1498 Res = And(Or(A, Not(C)), B);
1499 break;
1500 case 0xc5:
1501 if (ABCIsConst)
1503 break;
1504 case 0xc6:
1505 if (ABCIsConst)
1506 Res = Xor(Nor(A, Not(C)), B);
1507 break;
1508 case 0xc7:
1509 if (ABCIsConst)
1510 Res = Or(Xnor(A, B), Nor(A, C));
1511 break;
1512 case 0xc8:
1513 if (ABCIsConst)
1515 break;
1516 case 0xc9:
1517 if (ABCIsConst)
1519 break;
1520 case 0xca:
1521 if (ABCIsConst)
1522 Res = Xor(B, Nor(A, Xnor(B, C)));
1523 break;
1524 case 0xcb:
1525 if (ABCIsConst)
1526 Res = Or(Xnor(A, B), And(B, C));
1527 break;
1528 case 0xcc:
1529 Res = B;
1530 break;
1531 case 0xcd:
1532 if (ABCIsConst)
1534 break;
1535 case 0xce:
1536 if (ABCIsConst)
1537 Res = Or(Nor(A, Not(C)), B);
1538 break;
1539 case 0xcf:
1540 if (ABIsConst)
1542 break;
1543 case 0xd0:
1544 if (ABCIsConst)
1545 Res = And(A, Or(B, Not(C)));
1546 break;
1547 case 0xd1:
1548 if (ABCIsConst)
1550 break;
1551 case 0xd2:
1552 if (ABCIsConst)
1553 Res = Xor(A, Nor(B, Not(C)));
1554 break;
1555 case 0xd3:
1556 if (ABCIsConst)
1557 Res = Or(Xnor(A, B), Nor(B, C));
1558 break;
1559 case 0xd4:
1560 if (ABCIsConst)
1561 Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A);
1562 break;
1563 case 0xd5:
1564 if (ABCIsConst)
1565 Res = Nand(Nand(A, B), C);
1566 break;
1567 case 0xd6:
1568 if (ABCIsConst)
1570 break;
1571 case 0xd7:
1572 if (ABCIsConst)
1573 Res = Nand(Xor(A, B), C);
1574 break;
1575 case 0xd8:
1576 if (ABCIsConst)
1577 Res = Xor(Nor(Xnor(A, B), C), B);
1578 break;
1579 case 0xd9:
1580 if (ABCIsConst)
1581 Res = Or(And(A, B), Xnor(B, C));
1582 break;
1583 case 0xda:
1584 if (ABCIsConst)
1586 break;
1587 case 0xdb:
1588 if (ABCIsConst)
1589 Res = Or(Xnor(A, B), Xor(A, C));
1590 break;
1591 case 0xdc:
1592 if (ABCIsConst)
1593 Res = Or(B, Nor(C, Not(A)));
1594 break;
1595 case 0xdd:
1596 if (BCIsConst)
1598 break;
1599 case 0xde:
1600 if (ABCIsConst)
1602 break;
1603 case 0xdf:
1604 if (ABCIsConst)
1606 break;
1607 case 0xe0:
1608 if (ABCIsConst)
1610 break;
1611 case 0xe1:
1612 if (ABCIsConst)
1614 break;
1615 case 0xe2:
1616 if (ABCIsConst)
1617 Res = Xor(A, Nor(Xnor(A, C), B));
1618 break;
1619 case 0xe3:
1620 if (ABCIsConst)
1622 break;
1623 case 0xe4:
1624 if (ABCIsConst)
1625 Res = Xor(A, Nor(Xnor(A, B), C));
1626 break;
1627 case 0xe5:
1628 if (ABCIsConst)
1630 break;
1631 case 0xe6:
1632 if (ABCIsConst)
1634 break;
1635 case 0xe7:
1636 if (ABCIsConst)
1637 Res = Or(Xnor(A, B), Xnor(A, C));
1638 break;
1639 case 0xe8:
1640 if (ABCIsConst)
1641 Res = Xor(Or(A, B), Nor(Xnor(A, B), C));
1642 break;
1643 case 0xe9:
1644 if (ABCIsConst)
1645 Res = Xor(Xor(A, B), Nand(Nand(A, B), C));
1646 break;
1647 case 0xea:
1648 if (ABCIsConst)
1650 break;
1651 case 0xeb:
1652 if (ABCIsConst)
1654 break;
1655 case 0xec:
1656 if (ABCIsConst)
1658 break;
1659 case 0xed:
1660 if (ABCIsConst)
1662 break;
1663 case 0xee:
1665 break;
1666 case 0xef:
1667 if (ABCIsConst)
1668 Res = Nand(A, Nor(B, C));
1669 break;
1670 case 0xf0:
1671 Res = A;
1672 break;
1673 case 0xf1:
1674 if (ABCIsConst)
1676 break;
1677 case 0xf2:
1678 if (ABCIsConst)
1679 Res = Or(A, Nor(B, Not(C)));
1680 break;
1681 case 0xf3:
1682 if (ABIsConst)
1684 break;
1685 case 0xf4:
1686 if (ABCIsConst)
1687 Res = Or(A, Nor(C, Not(B)));
1688 break;
1689 case 0xf5:
1690 if (ACIsConst)
1692 break;
1693 case 0xf6:
1694 if (ABCIsConst)
1696 break;
1697 case 0xf7:
1698 if (ABCIsConst)
1700 break;
1701 case 0xf8:
1702 if (ABCIsConst)
1704 break;
1705 case 0xf9:
1706 if (ABCIsConst)
1708 break;
1709 case 0xfa:
1711 break;
1712 case 0xfb:
1713 if (ABCIsConst)
1714 Res = Nand(Nor(A, C), B);
1715 break;
1716 case 0xfc:
1718 break;
1719 case 0xfd:
1720 if (ABCIsConst)
1721 Res = Nand(Nor(A, B), C);
1722 break;
1723 case 0xfe:
1724 if (ABCIsConst)
1726 break;
1727 case 0xff:
1729 break;
1730 }
1731
1732 assert((Res.first == nullptr || Res.second == Imm) &&
1733 "Simplification of ternary logic does not verify!");
1734 return Res.first;
1735}
1736
1740 if (!CInt)
1741 return nullptr;
1742
1744 assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
1745
1746
1747
1748
1749
1750
1751 uint8_t Imm = CInt->getZExtValue();
1752 uint8_t ZMask = Imm & 0xf;
1753 uint8_t DestLane = (Imm >> 4) & 0x3;
1754 uint8_t SourceLane = (Imm >> 6) & 0x3;
1755
1757
1758
1759
1760 if (ZMask == 0xf)
1761 return ZeroVector;
1762
1763
1764 int ShuffleMask[4] = {0, 1, 2, 3};
1765
1766
1767 Value *V1 = II.getArgOperand(1);
1768
1769 if (ZMask) {
1770
1771
1772 if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
1773 (ZMask & (1 << DestLane))) {
1774 V1 = ZeroVector;
1775
1776
1777 ShuffleMask[DestLane] = SourceLane;
1778
1779 for (unsigned i = 0; i < 4; ++i)
1780 if ((ZMask >> i) & 0x1)
1781 ShuffleMask[i] = i + 4;
1782 } else {
1783
1784 return nullptr;
1785 }
1786 } else {
1787
1788 ShuffleMask[DestLane] = SourceLane + 4;
1789 }
1790
1791 return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
1792}
1793
1794
1795
1799 auto LowConstantHighUndef = [&](uint64_t Val) {
1801 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1804 };
1805
1806
1808 auto *CI0 =
1810 : nullptr;
1811
1812
1813 if (CILength && CIIndex) {
1814
1815
1818
1820
1821
1822
1824
1825
1826
1827 unsigned End = Index + Length;
1828
1829
1830
1831
1832
1833 if (End > 64)
1835
1836
1837
1838 if ((Length % 8) == 0 && (Index % 8) == 0) {
1839
1841 Index /= 8;
1842
1845
1847 for (int i = 0; i != (int)Length; ++i)
1848 ShuffleMask.push_back(i + Index);
1849 for (int i = Length; i != 8; ++i)
1850 ShuffleMask.push_back(i + 16);
1851 for (int i = 8; i != 16; ++i)
1852 ShuffleMask.push_back(-1);
1853
1854 Value *SV = Builder.CreateShuffleVector(
1855 Builder.CreateBitCast(Op0, ShufTy),
1857 return Builder.CreateBitCast(SV, II.getType());
1858 }
1859
1860
1861
1862 if (CI0) {
1863 APInt Elt = CI0->getValue();
1866 return LowConstantHighUndef(Elt.getZExtValue());
1867 }
1868
1869
1870 if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1871 Value *Args[] = {Op0, CILength, CIIndex};
1872 return Builder.CreateIntrinsic(Intrinsic::x86_sse4a_extrqi, Args);
1873 }
1874 }
1875
1876
1877 if (CI0 && CI0->isZero())
1878 return LowConstantHighUndef(0);
1879
1880 return nullptr;
1881}
1882
1883
1884
1888
1889
1892
1893
1895
1896
1897
1899
1900
1901
1902 unsigned End = Index + Length;
1903
1904
1905
1906
1907
1908 if (End > 64)
1910
1911
1912
1913 if ((Length % 8) == 0 && (Index % 8) == 0) {
1914
1916 Index /= 8;
1917
1920
1922 for (int i = 0; i != (int)Index; ++i)
1924 for (int i = 0; i != (int)Length; ++i)
1925 ShuffleMask.push_back(i + 16);
1926 for (int i = Index + Length; i != 8; ++i)
1927 ShuffleMask.push_back(i);
1928 for (int i = 8; i != 16; ++i)
1929 ShuffleMask.push_back(-1);
1930
1931 Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
1932 Builder.CreateBitCast(Op1, ShufTy),
1933 ShuffleMask);
1934 return Builder.CreateBitCast(SV, II.getType());
1935 }
1936
1937
1940 auto *CI00 =
1942 : nullptr;
1943 auto *CI10 =
1945 : nullptr;
1946
1947
1948 if (CI00 && CI10) {
1949 APInt V00 = CI00->getValue();
1950 APInt V10 = CI10->getValue();
1952 V00 = V00 & ~Mask;
1954 APInt Val = V00 | V10;
1959 }
1960
1961
1962
1963 if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1965 Constant *CILength = ConstantInt::get(IntTy8, Length, false);
1966 Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
1967
1968 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1969 return Builder.CreateIntrinsic(Intrinsic::x86_sse4a_insertqi, Args);
1970 }
1971
1972 return nullptr;
1973}
1974
1975
1979 if (!V)
1980 return nullptr;
1981
1983 unsigned NumElts = VecTy->getNumElements();
1984 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1985 "Unexpected number of elements in shuffle mask!");
1986
1987
1988 int Indexes[64];
1989
1990
1991
1992 for (unsigned I = 0; I < NumElts; ++I) {
1995 return nullptr;
1996
1998 Indexes[I] = -1;
1999 continue;
2000 }
2001
2002 int8_t Index = cast(COp)->getValue().getZExtValue();
2003
2004
2005
2006
2007
2008
2009
2010
2011 Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
2012 Indexes[I] = Index;
2013 }
2014
2015 auto V1 = II.getArgOperand(0);
2017 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
2018}
2019
2020
2024 if (!V)
2025 return nullptr;
2026
2028 unsigned NumElts = VecTy->getNumElements();
2029 bool IsPD = VecTy->getScalarType()->isDoubleTy();
2030 unsigned NumLaneElts = IsPD ? 2 : 4;
2031 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
2032
2033
2034 int Indexes[16];
2035
2036
2037 for (unsigned I = 0; I < NumElts; ++I) {
2040 return nullptr;
2041
2043 Indexes[I] = -1;
2044 continue;
2045 }
2046
2049
2050
2051
2052 if (IsPD)
2054
2055
2056
2057
2058 Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
2059
2060 Indexes[I] = Index.getZExtValue();
2061 }
2062
2063 auto V1 = II.getArgOperand(0);
2064 return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, NumElts));
2065}
2066
2067
2071 if (!V)
2072 return nullptr;
2073
2075 unsigned Size = VecTy->getNumElements();
2077 "Unexpected shuffle mask size");
2078
2079
2080 int Indexes[64];
2081
2082 for (unsigned I = 0; I < Size; ++I) {
2085 return nullptr;
2086
2088 Indexes[I] = -1;
2089 continue;
2090 }
2091
2093 Index &= Size - 1;
2094 Indexes[I] = Index;
2095 }
2096
2097 auto V1 = II.getArgOperand(0);
2098 return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, Size));
2099}
2100
2101
2105 if (!V)
2106 return nullptr;
2107
2109 unsigned Size = VecTy->getNumElements();
2111 Size == 64) &&
2112 "Unexpected shuffle mask size");
2113
2114
2115 int Indexes[64];
2116
2117 for (unsigned I = 0; I < Size; ++I) {
2120 return nullptr;
2121
2123 Indexes[I] = -1;
2124 continue;
2125 }
2126
2128 Index &= (2 * Size) - 1;
2129 Indexes[I] = Index;
2130 }
2131
2132 auto V1 = II.getArgOperand(0);
2133 auto V2 = II.getArgOperand(2);
2134 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, Size));
2135}
2136
2137
2141 unsigned EltSizeInBits = VecTy->getScalarSizeInBits();
2142 unsigned NumElts = VecTy->getNumElements();
2144 "Unexpected shuffle mask size");
2145
2146 unsigned IdxSizeInBits = Log2_32(IsBinary ? (2 * NumElts) : NumElts);
2148
2149 KnownBits KnownMask(EltSizeInBits);
2151}
2152
2153std::optional<Instruction *>
2155 auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
2156 unsigned DemandedWidth) {
2157 APInt UndefElts(Width, 0);
2160 };
2161
2163 switch (IID) {
2164 case Intrinsic::x86_bmi_bextr_32:
2165 case Intrinsic::x86_bmi_bextr_64:
2166 case Intrinsic::x86_tbm_bextri_u32:
2167 case Intrinsic::x86_tbm_bextri_u64:
2168
2170 uint64_t Shift = C->getZExtValue();
2172 Shift &= 0xff;
2173 unsigned BitWidth = II.getType()->getIntegerBitWidth();
2174
2177 }
2178
2180 uint64_t Result = InC->getZExtValue() >> Shift;
2185 ConstantInt::get(II.getType(), Result));
2186 }
2187
2188
2189 }
2190 break;
2191
2192 case Intrinsic::x86_bmi_bzhi_32:
2193 case Intrinsic::x86_bmi_bzhi_64:
2194
2196 uint64_t Index = C->getZExtValue() & 0xff;
2197 unsigned BitWidth = II.getType()->getIntegerBitWidth();
2200 }
2201 if (Index == 0) {
2203 }
2204
2206 uint64_t Result = InC->getZExtValue();
2209 ConstantInt::get(II.getType(), Result));
2210 }
2211
2212 }
2213 break;
2214 case Intrinsic::x86_bmi_pext_32:
2215 case Intrinsic::x86_bmi_pext_64:
2217 if (MaskC->isNullValue()) {
2219 }
2220 if (MaskC->isAllOnesValue()) {
2222 }
2223
2224 unsigned MaskIdx, MaskLen;
2225 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2226
2227
2228
2231 Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
2234 }
2235
2237 uint64_t Src = SrcC->getZExtValue();
2238 uint64_t Mask = MaskC->getZExtValue();
2241
2242 while (Mask) {
2243
2244 uint64_t BitToTest = Mask & -Mask;
2245 if (BitToTest & Src)
2246 Result |= BitToSet;
2247
2248 BitToSet <<= 1;
2249
2250 Mask &= Mask - 1;
2251 }
2252
2254 ConstantInt::get(II.getType(), Result));
2255 }
2256 }
2257 break;
2258 case Intrinsic::x86_bmi_pdep_32:
2259 case Intrinsic::x86_bmi_pdep_64:
2261 if (MaskC->isNullValue()) {
2263 }
2264 if (MaskC->isAllOnesValue()) {
2266 }
2267
2268 unsigned MaskIdx, MaskLen;
2269 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2270
2271
2272
2274 Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
2278 }
2279
2281 uint64_t Src = SrcC->getZExtValue();
2282 uint64_t Mask = MaskC->getZExtValue();
2285
2286 while (Mask) {
2287
2288 uint64_t BitToSet = Mask & -Mask;
2289 if (BitToTest & Src)
2290 Result |= BitToSet;
2291
2292 BitToTest <<= 1;
2293
2294 Mask &= Mask - 1;
2295 }
2296
2298 ConstantInt::get(II.getType(), Result));
2299 }
2300 }
2301 break;
2302
2303 case Intrinsic::x86_sse_cvtss2si:
2304 case Intrinsic::x86_sse_cvtss2si64:
2305 case Intrinsic::x86_sse_cvttss2si:
2306 case Intrinsic::x86_sse_cvttss2si64:
2307 case Intrinsic::x86_sse2_cvtsd2si:
2308 case Intrinsic::x86_sse2_cvtsd2si64:
2309 case Intrinsic::x86_sse2_cvttsd2si:
2310 case Intrinsic::x86_sse2_cvttsd2si64:
2311 case Intrinsic::x86_avx512_vcvtss2si32:
2312 case Intrinsic::x86_avx512_vcvtss2si64:
2313 case Intrinsic::x86_avx512_vcvtss2usi32:
2314 case Intrinsic::x86_avx512_vcvtss2usi64:
2315 case Intrinsic::x86_avx512_vcvtsd2si32:
2316 case Intrinsic::x86_avx512_vcvtsd2si64:
2317 case Intrinsic::x86_avx512_vcvtsd2usi32:
2318 case Intrinsic::x86_avx512_vcvtsd2usi64:
2319 case Intrinsic::x86_avx512_cvttss2si:
2320 case Intrinsic::x86_avx512_cvttss2si64:
2321 case Intrinsic::x86_avx512_cvttss2usi:
2322 case Intrinsic::x86_avx512_cvttss2usi64:
2323 case Intrinsic::x86_avx512_cvttsd2si:
2324 case Intrinsic::x86_avx512_cvttsd2si64:
2325 case Intrinsic::x86_avx512_cvttsd2usi:
2326 case Intrinsic::x86_avx512_cvttsd2usi64: {
2327
2328
2329 Value *Arg = II.getArgOperand(0);
2331 if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2333 }
2334 break;
2335 }
2336
2337 case Intrinsic::x86_mmx_pmovmskb:
2338 case Intrinsic::x86_sse_movmsk_ps:
2339 case Intrinsic::x86_sse2_movmsk_pd:
2340 case Intrinsic::x86_sse2_pmovmskb_128:
2341 case Intrinsic::x86_avx_movmsk_pd_256:
2342 case Intrinsic::x86_avx_movmsk_ps_256:
2343 case Intrinsic::x86_avx2_pmovmskb:
2346 }
2347 break;
2348
2349 case Intrinsic::x86_sse_comieq_ss:
2350 case Intrinsic::x86_sse_comige_ss:
2351 case Intrinsic::x86_sse_comigt_ss:
2352 case Intrinsic::x86_sse_comile_ss:
2353 case Intrinsic::x86_sse_comilt_ss:
2354 case Intrinsic::x86_sse_comineq_ss:
2355 case Intrinsic::x86_sse_ucomieq_ss:
2356 case Intrinsic::x86_sse_ucomige_ss:
2357 case Intrinsic::x86_sse_ucomigt_ss:
2358 case Intrinsic::x86_sse_ucomile_ss:
2359 case Intrinsic::x86_sse_ucomilt_ss:
2360 case Intrinsic::x86_sse_ucomineq_ss:
2361 case Intrinsic::x86_sse2_comieq_sd:
2362 case Intrinsic::x86_sse2_comige_sd:
2363 case Intrinsic::x86_sse2_comigt_sd:
2364 case Intrinsic::x86_sse2_comile_sd:
2365 case Intrinsic::x86_sse2_comilt_sd:
2366 case Intrinsic::x86_sse2_comineq_sd:
2367 case Intrinsic::x86_sse2_ucomieq_sd:
2368 case Intrinsic::x86_sse2_ucomige_sd:
2369 case Intrinsic::x86_sse2_ucomigt_sd:
2370 case Intrinsic::x86_sse2_ucomile_sd:
2371 case Intrinsic::x86_sse2_ucomilt_sd:
2372 case Intrinsic::x86_sse2_ucomineq_sd:
2373 case Intrinsic::x86_avx512_vcomi_ss:
2374 case Intrinsic::x86_avx512_vcomi_sd:
2375 case Intrinsic::x86_avx512_mask_cmp_ss:
2376 case Intrinsic::x86_avx512_mask_cmp_sd: {
2377
2378
2379 bool MadeChange = false;
2380 Value *Arg0 = II.getArgOperand(0);
2381 Value *Arg1 = II.getArgOperand(1);
2383 if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2385 MadeChange = true;
2386 }
2387 if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2389 MadeChange = true;
2390 }
2391 if (MadeChange) {
2392 return &II;
2393 }
2394 break;
2395 }
2396
2397 case Intrinsic::x86_avx512_add_ps_512:
2398 case Intrinsic::x86_avx512_div_ps_512:
2399 case Intrinsic::x86_avx512_mul_ps_512:
2400 case Intrinsic::x86_avx512_sub_ps_512:
2401 case Intrinsic::x86_avx512_add_pd_512:
2402 case Intrinsic::x86_avx512_div_pd_512:
2403 case Intrinsic::x86_avx512_mul_pd_512:
2404 case Intrinsic::x86_avx512_sub_pd_512:
2405
2406
2408 if (R->getValue() == 4) {
2409 Value *Arg0 = II.getArgOperand(0);
2410 Value *Arg1 = II.getArgOperand(1);
2411
2413 switch (IID) {
2414 default:
2416 case Intrinsic::x86_avx512_add_ps_512:
2417 case Intrinsic::x86_avx512_add_pd_512:
2419 break;
2420 case Intrinsic::x86_avx512_sub_ps_512:
2421 case Intrinsic::x86_avx512_sub_pd_512:
2423 break;
2424 case Intrinsic::x86_avx512_mul_ps_512:
2425 case Intrinsic::x86_avx512_mul_pd_512:
2427 break;
2428 case Intrinsic::x86_avx512_div_ps_512:
2429 case Intrinsic::x86_avx512_div_pd_512:
2431 break;
2432 }
2433
2435 }
2436 }
2437 break;
2438
2439 case Intrinsic::x86_avx512_mask_add_ss_round:
2440 case Intrinsic::x86_avx512_mask_div_ss_round:
2441 case Intrinsic::x86_avx512_mask_mul_ss_round:
2442 case Intrinsic::x86_avx512_mask_sub_ss_round:
2443 case Intrinsic::x86_avx512_mask_add_sd_round:
2444 case Intrinsic::x86_avx512_mask_div_sd_round:
2445 case Intrinsic::x86_avx512_mask_mul_sd_round:
2446 case Intrinsic::x86_avx512_mask_sub_sd_round:
2447
2448
2450 if (R->getValue() == 4) {
2451
2452 Value *Arg0 = II.getArgOperand(0);
2453 Value *Arg1 = II.getArgOperand(1);
2456
2458 switch (IID) {
2459 default:
2461 case Intrinsic::x86_avx512_mask_add_ss_round:
2462 case Intrinsic::x86_avx512_mask_add_sd_round:
2464 break;
2465 case Intrinsic::x86_avx512_mask_sub_ss_round:
2466 case Intrinsic::x86_avx512_mask_sub_sd_round:
2468 break;
2469 case Intrinsic::x86_avx512_mask_mul_ss_round:
2470 case Intrinsic::x86_avx512_mask_mul_sd_round:
2472 break;
2473 case Intrinsic::x86_avx512_mask_div_ss_round:
2474 case Intrinsic::x86_avx512_mask_div_sd_round:
2476 break;
2477 }
2478
2479
2480 Value *Mask = II.getArgOperand(3);
2482
2483 if ( ||
->getValue()[0]) {
2484
2490
2491 Value *Passthru =
2494 }
2495
2496
2498
2500 }
2501 }
2502 break;
2503
2504
2505
2506
2507 case Intrinsic::x86_sse2_psrai_d:
2508 case Intrinsic::x86_sse2_psrai_w:
2509 case Intrinsic::x86_avx2_psrai_d:
2510 case Intrinsic::x86_avx2_psrai_w:
2511 case Intrinsic::x86_avx512_psrai_q_128:
2512 case Intrinsic::x86_avx512_psrai_q_256:
2513 case Intrinsic::x86_avx512_psrai_d_512:
2514 case Intrinsic::x86_avx512_psrai_q_512:
2515 case Intrinsic::x86_avx512_psrai_w_512:
2516 case Intrinsic::x86_sse2_psrli_d:
2517 case Intrinsic::x86_sse2_psrli_q:
2518 case Intrinsic::x86_sse2_psrli_w:
2519 case Intrinsic::x86_avx2_psrli_d:
2520 case Intrinsic::x86_avx2_psrli_q:
2521 case Intrinsic::x86_avx2_psrli_w:
2522 case Intrinsic::x86_avx512_psrli_d_512:
2523 case Intrinsic::x86_avx512_psrli_q_512:
2524 case Intrinsic::x86_avx512_psrli_w_512:
2525 case Intrinsic::x86_sse2_pslli_d:
2526 case Intrinsic::x86_sse2_pslli_q:
2527 case Intrinsic::x86_sse2_pslli_w:
2528 case Intrinsic::x86_avx2_pslli_d:
2529 case Intrinsic::x86_avx2_pslli_q:
2530 case Intrinsic::x86_avx2_pslli_w:
2531 case Intrinsic::x86_avx512_pslli_d_512:
2532 case Intrinsic::x86_avx512_pslli_q_512:
2533 case Intrinsic::x86_avx512_pslli_w_512:
2536 }
2537 break;
2538
2539 case Intrinsic::x86_sse2_psra_d:
2540 case Intrinsic::x86_sse2_psra_w:
2541 case Intrinsic::x86_avx2_psra_d:
2542 case Intrinsic::x86_avx2_psra_w:
2543 case Intrinsic::x86_avx512_psra_q_128:
2544 case Intrinsic::x86_avx512_psra_q_256:
2545 case Intrinsic::x86_avx512_psra_d_512:
2546 case Intrinsic::x86_avx512_psra_q_512:
2547 case Intrinsic::x86_avx512_psra_w_512:
2548 case Intrinsic::x86_sse2_psrl_d:
2549 case Intrinsic::x86_sse2_psrl_q:
2550 case Intrinsic::x86_sse2_psrl_w:
2551 case Intrinsic::x86_avx2_psrl_d:
2552 case Intrinsic::x86_avx2_psrl_q:
2553 case Intrinsic::x86_avx2_psrl_w:
2554 case Intrinsic::x86_avx512_psrl_d_512:
2555 case Intrinsic::x86_avx512_psrl_q_512:
2556 case Intrinsic::x86_avx512_psrl_w_512:
2557 case Intrinsic::x86_sse2_psll_d:
2558 case Intrinsic::x86_sse2_psll_q:
2559 case Intrinsic::x86_sse2_psll_w:
2560 case Intrinsic::x86_avx2_psll_d:
2561 case Intrinsic::x86_avx2_psll_q:
2562 case Intrinsic::x86_avx2_psll_w:
2563 case Intrinsic::x86_avx512_psll_d_512:
2564 case Intrinsic::x86_avx512_psll_q_512:
2565 case Intrinsic::x86_avx512_psll_w_512: {
2568 }
2569
2570
2571
2572 Value *Arg1 = II.getArgOperand(1);
2574 "Unexpected packed shift size");
2576
2577 if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2579 }
2580 break;
2581 }
2582
2583 case Intrinsic::x86_avx2_psllv_d:
2584 case Intrinsic::x86_avx2_psllv_d_256:
2585 case Intrinsic::x86_avx2_psllv_q:
2586 case Intrinsic::x86_avx2_psllv_q_256:
2587 case Intrinsic::x86_avx512_psllv_d_512:
2588 case Intrinsic::x86_avx512_psllv_q_512:
2589 case Intrinsic::x86_avx512_psllv_w_128:
2590 case Intrinsic::x86_avx512_psllv_w_256:
2591 case Intrinsic::x86_avx512_psllv_w_512:
2592 case Intrinsic::x86_avx2_psrav_d:
2593 case Intrinsic::x86_avx2_psrav_d_256:
2594 case Intrinsic::x86_avx512_psrav_q_128:
2595 case Intrinsic::x86_avx512_psrav_q_256:
2596 case Intrinsic::x86_avx512_psrav_d_512:
2597 case Intrinsic::x86_avx512_psrav_q_512:
2598 case Intrinsic::x86_avx512_psrav_w_128:
2599 case Intrinsic::x86_avx512_psrav_w_256:
2600 case Intrinsic::x86_avx512_psrav_w_512:
2601 case Intrinsic::x86_avx2_psrlv_d:
2602 case Intrinsic::x86_avx2_psrlv_d_256:
2603 case Intrinsic::x86_avx2_psrlv_q:
2604 case Intrinsic::x86_avx2_psrlv_q_256:
2605 case Intrinsic::x86_avx512_psrlv_d_512:
2606 case Intrinsic::x86_avx512_psrlv_q_512:
2607 case Intrinsic::x86_avx512_psrlv_w_128:
2608 case Intrinsic::x86_avx512_psrlv_w_256:
2609 case Intrinsic::x86_avx512_psrlv_w_512:
2612 }
2613 break;
2614
2615 case Intrinsic::x86_sse2_packssdw_128:
2616 case Intrinsic::x86_sse2_packsswb_128:
2617 case Intrinsic::x86_avx2_packssdw:
2618 case Intrinsic::x86_avx2_packsswb:
2619 case Intrinsic::x86_avx512_packssdw_512:
2620 case Intrinsic::x86_avx512_packsswb_512:
2623 }
2624 break;
2625
2626 case Intrinsic::x86_sse2_packuswb_128:
2627 case Intrinsic::x86_sse41_packusdw:
2628 case Intrinsic::x86_avx2_packusdw:
2629 case Intrinsic::x86_avx2_packuswb:
2630 case Intrinsic::x86_avx512_packusdw_512:
2631 case Intrinsic::x86_avx512_packuswb_512:
2634 }
2635 break;
2636
2637 case Intrinsic::x86_sse2_pmulh_w:
2638 case Intrinsic::x86_avx2_pmulh_w:
2639 case Intrinsic::x86_avx512_pmulh_w_512:
2642 }
2643 break;
2644
2645 case Intrinsic::x86_sse2_pmulhu_w:
2646 case Intrinsic::x86_avx2_pmulhu_w:
2647 case Intrinsic::x86_avx512_pmulhu_w_512:
2650 }
2651 break;
2652
2653 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2654 case Intrinsic::x86_avx2_pmul_hr_sw:
2655 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2658 }
2659 break;
2660
2661 case Intrinsic::x86_sse2_pmadd_wd:
2662 case Intrinsic::x86_avx2_pmadd_wd:
2663 case Intrinsic::x86_avx512_pmaddw_d_512:
2666 }
2667 break;
2668
2669 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2670 case Intrinsic::x86_avx2_pmadd_ub_sw:
2671 case Intrinsic::x86_avx512_pmaddubs_w_512:
2674 }
2675 break;
2676
2677 case Intrinsic::x86_pclmulqdq:
2678 case Intrinsic::x86_pclmulqdq_256:
2679 case Intrinsic::x86_pclmulqdq_512: {
2681 unsigned Imm = C->getZExtValue();
2682
2683 bool MadeChange = false;
2684 Value *Arg0 = II.getArgOperand(0);
2685 Value *Arg1 = II.getArgOperand(1);
2686 unsigned VWidth =
2688
2689 APInt UndefElts1(VWidth, 0);
2690 APInt DemandedElts1 =
2695 MadeChange = true;
2696 }
2697
2698 APInt UndefElts2(VWidth, 0);
2699 APInt DemandedElts2 =
2704 MadeChange = true;
2705 }
2706
2707
2708 if (DemandedElts1.isSubsetOf(UndefElts1) ||
2709 DemandedElts2.isSubsetOf(UndefElts2)) {
2712 }
2713
2714 if (MadeChange) {
2715 return &II;
2716 }
2717 }
2718 break;
2719 }
2720
2721 case Intrinsic::x86_sse41_insertps:
2724 }
2725 break;
2726
2727 case Intrinsic::x86_sse4a_extrq: {
2728 Value *Op0 = II.getArgOperand(0);
2729 Value *Op1 = II.getArgOperand(1);
2734 VWidth1 == 16 && "Unexpected operand sizes");
2735
2736
2738 auto *CILength =
2740 : nullptr;
2741 auto *CIIndex =
2743 : nullptr;
2744
2745
2748 }
2749
2750
2751
2752 bool MadeChange = false;
2753 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2755 MadeChange = true;
2756 }
2757 if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2759 MadeChange = true;
2760 }
2761 if (MadeChange) {
2762 return &II;
2763 }
2764 break;
2765 }
2766
2767 case Intrinsic::x86_sse4a_extrqi: {
2768
2769
2770 Value *Op0 = II.getArgOperand(0);
2773 "Unexpected operand size");
2774
2775
2778
2779
2782 }
2783
2784
2785
2786 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2788 }
2789 break;
2790 }
2791
2792 case Intrinsic::x86_sse4a_insertq: {
2793 Value *Op0 = II.getArgOperand(0);
2794 Value *Op1 = II.getArgOperand(1);
2799 "Unexpected operand size");
2800
2801
2803 auto *CI11 =
2805 : nullptr;
2806
2807
2808 if (CI11) {
2809 const APInt &V11 = CI11->getValue();
2814 }
2815 }
2816
2817
2818
2819 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2821 }
2822 break;
2823 }
2824
2825 case Intrinsic::x86_sse4a_insertqi: {
2826
2827
2828
2829 Value *Op0 = II.getArgOperand(0);
2830 Value *Op1 = II.getArgOperand(1);
2835 VWidth1 == 2 && "Unexpected operand sizes");
2836
2837
2840
2841
2842 if (CILength && CIIndex) {
2843 APInt Len = CILength->getValue().zextOrTrunc(6);
2847 }
2848 }
2849
2850
2851
2852 bool MadeChange = false;
2853 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2855 MadeChange = true;
2856 }
2857 if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2859 MadeChange = true;
2860 }
2861 if (MadeChange) {
2862 return &II;
2863 }
2864 break;
2865 }
2866
2867 case Intrinsic::x86_sse41_pblendvb:
2868 case Intrinsic::x86_sse41_blendvps:
2869 case Intrinsic::x86_sse41_blendvpd:
2870 case Intrinsic::x86_avx_blendv_ps_256:
2871 case Intrinsic::x86_avx_blendv_pd_256:
2872 case Intrinsic::x86_avx2_pblendvb: {
2873
2874 Value *Op0 = II.getArgOperand(0);
2875 Value *Op1 = II.getArgOperand(1);
2876 Value *Mask = II.getArgOperand(2);
2877 if (Op0 == Op1) {
2879 }
2880
2881
2884 }
2885
2886
2891 }
2892
2894
2895
2896
2897
2898 Value *MaskSrc = nullptr;
2901 m_Mask(ShuffleMask))))) {
2902
2904 if (NumElts < (int)ShuffleMask.size() || (NumElts) ||
2906 [NumElts](int M) { return M < 0 || M >= NumElts; }))
2907 break;
2909 }
2910
2911
2912
2919 unsigned NumMaskElts = MaskTy->getNumElements();
2920 unsigned NumOperandElts = OpTy->getNumElements();
2921
2922
2923 if (MaskSrc) {
2924 unsigned NumMaskSrcElts =
2926 NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
2927
2928 if (NumMaskElts > NumOperandElts)
2929 break;
2932 break;
2935 }
2936 assert(MaskTy->getPrimitiveSizeInBits() ==
2937 OpTy->getPrimitiveSizeInBits() &&
2938 "Not expecting mask and operands with different sizes");
2939
2940 if (NumMaskElts == NumOperandElts) {
2942 }
2943
2944
2945
2946 if (NumMaskElts < NumOperandElts) {
2951 }
2952 }
2953
2954 break;
2955 }
2956
2957 case Intrinsic::x86_ssse3_pshuf_b_128:
2958 case Intrinsic::x86_avx2_pshuf_b:
2959 case Intrinsic::x86_avx512_pshuf_b_512: {
2962 }
2963
2966 return &II;
2967 break;
2968 }
2969
2970 case Intrinsic::x86_avx_vpermilvar_ps:
2971 case Intrinsic::x86_avx_vpermilvar_ps_256:
2972 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
2975 }
2976
2979 return &II;
2980 break;
2981 }
2982
2983 case Intrinsic::x86_avx_vpermilvar_pd:
2984 case Intrinsic::x86_avx_vpermilvar_pd_256:
2985 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
2988 }
2989
2992 return &II;
2993 break;
2994 }
2995
2996 case Intrinsic::x86_avx2_permd:
2997 case Intrinsic::x86_avx2_permps:
2998 case Intrinsic::x86_avx512_permvar_df_256:
2999 case Intrinsic::x86_avx512_permvar_df_512:
3000 case Intrinsic::x86_avx512_permvar_di_256:
3001 case Intrinsic::x86_avx512_permvar_di_512:
3002 case Intrinsic::x86_avx512_permvar_hi_128:
3003 case Intrinsic::x86_avx512_permvar_hi_256:
3004 case Intrinsic::x86_avx512_permvar_hi_512:
3005 case Intrinsic::x86_avx512_permvar_qi_128:
3006 case Intrinsic::x86_avx512_permvar_qi_256:
3007 case Intrinsic::x86_avx512_permvar_qi_512:
3008 case Intrinsic::x86_avx512_permvar_sf_512:
3009 case Intrinsic::x86_avx512_permvar_si_512:
3012 }
3014 return &II;
3015 break;
3016
3017 case Intrinsic::x86_avx512_vpermi2var_d_128:
3018 case Intrinsic::x86_avx512_vpermi2var_d_256:
3019 case Intrinsic::x86_avx512_vpermi2var_d_512:
3020 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3021 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3022 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3023 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3024 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3025 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3026 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3027 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3028 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3029 case Intrinsic::x86_avx512_vpermi2var_q_128:
3030 case Intrinsic::x86_avx512_vpermi2var_q_256:
3031 case Intrinsic::x86_avx512_vpermi2var_q_512:
3032 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3033 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3034 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3037 }
3039 return &II;
3040 break;
3041
3042 case Intrinsic::x86_avx_maskload_ps:
3043 case Intrinsic::x86_avx_maskload_pd:
3044 case Intrinsic::x86_avx_maskload_ps_256:
3045 case Intrinsic::x86_avx_maskload_pd_256:
3046 case Intrinsic::x86_avx2_maskload_d:
3047 case Intrinsic::x86_avx2_maskload_q:
3048 case Intrinsic::x86_avx2_maskload_d_256:
3049 case Intrinsic::x86_avx2_maskload_q_256:
3051 return I;
3052 }
3053 break;
3054
3055 case Intrinsic::x86_sse2_maskmov_dqu:
3056 case Intrinsic::x86_avx_maskstore_ps:
3057 case Intrinsic::x86_avx_maskstore_pd:
3058 case Intrinsic::x86_avx_maskstore_ps_256:
3059 case Intrinsic::x86_avx_maskstore_pd_256:
3060 case Intrinsic::x86_avx2_maskstore_d:
3061 case Intrinsic::x86_avx2_maskstore_q:
3062 case Intrinsic::x86_avx2_maskstore_d_256:
3063 case Intrinsic::x86_avx2_maskstore_q_256:
3065 return nullptr;
3066 }
3067 break;
3068
3069 case Intrinsic::x86_addcarry_32:
3070 case Intrinsic::x86_addcarry_64:
3073 }
3074 break;
3075
3076 case Intrinsic::x86_avx512_pternlog_d_128:
3077 case Intrinsic::x86_avx512_pternlog_d_256:
3078 case Intrinsic::x86_avx512_pternlog_d_512:
3079 case Intrinsic::x86_avx512_pternlog_q_128:
3080 case Intrinsic::x86_avx512_pternlog_q_256:
3081 case Intrinsic::x86_avx512_pternlog_q_512:
3084 }
3085 break;
3086 default:
3087 break;
3088 }
3089 return std::nullopt;
3090}
3091
3094 bool &KnownBitsComputed) const {
3095 switch (II.getIntrinsicID()) {
3096 default:
3097 break;
3098 case Intrinsic::x86_mmx_pmovmskb:
3099 case Intrinsic::x86_sse_movmsk_ps:
3100 case Intrinsic::x86_sse2_movmsk_pd:
3101 case Intrinsic::x86_sse2_pmovmskb_128:
3102 case Intrinsic::x86_avx_movmsk_ps_256:
3103 case Intrinsic::x86_avx_movmsk_pd_256:
3104 case Intrinsic::x86_avx2_pmovmskb: {
3105
3106
3107 unsigned ArgWidth;
3108 if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
3109 ArgWidth = 8;
3110 } else {
3112 ArgWidth = ArgType->getNumElements();
3113 }
3114
3115
3116
3118 Type *VTy = II.getType();
3119 if (DemandedElts.isZero()) {
3121 }
3122
3123
3125 KnownBitsComputed = true;
3126 break;
3127 }
3128 }
3129 return std::nullopt;
3130}
3131
3134 APInt &UndefElts2, APInt &UndefElts3,
3136 simplifyAndSetOp) const {
3138 switch (II.getIntrinsicID()) {
3139 default:
3140 break;
3141 case Intrinsic::x86_xop_vfrcz_ss:
3142 case Intrinsic::x86_xop_vfrcz_sd:
3143
3144
3145
3146
3147 if (!DemandedElts[0]) {
3150 }
3151
3152
3153 DemandedElts = 1;
3154 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3155
3156
3157 UndefElts = UndefElts[0];
3158 break;
3159
3160
3161 case Intrinsic::x86_sse_rcp_ss:
3162 case Intrinsic::x86_sse_rsqrt_ss:
3163 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3164
3165
3166 if (!DemandedElts[0]) {
3168 return II.getArgOperand(0);
3169 }
3170
3171
3172 break;
3173
3174
3175
3176
3177 case Intrinsic::x86_sse_min_ss:
3178 case Intrinsic::x86_sse_max_ss:
3179 case Intrinsic::x86_sse_cmp_ss:
3180 case Intrinsic::x86_sse2_min_sd:
3181 case Intrinsic::x86_sse2_max_sd:
3182 case Intrinsic::x86_sse2_cmp_sd: {
3183 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3184
3185
3186 if (!DemandedElts[0]) {
3188 return II.getArgOperand(0);
3189 }
3190
3191
3192 DemandedElts = 1;
3193 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3194
3195
3196
3197 if (!UndefElts2[0])
3199
3200 break;
3201 }
3202
3203
3204
3205 case Intrinsic::x86_sse41_round_ss:
3206 case Intrinsic::x86_sse41_round_sd: {
3207
3208 APInt DemandedElts2 = DemandedElts;
3210 simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
3211
3212
3213 if (!DemandedElts[0]) {
3215 return II.getArgOperand(0);
3216 }
3217
3218
3219 DemandedElts = 1;
3220 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3221
3222
3223
3225 UndefElts |= UndefElts2[0];
3226 break;
3227 }
3228
3229
3230
3231
3232 case Intrinsic::x86_avx512_mask_add_ss_round:
3233 case Intrinsic::x86_avx512_mask_div_ss_round:
3234 case Intrinsic::x86_avx512_mask_mul_ss_round:
3235 case Intrinsic::x86_avx512_mask_sub_ss_round:
3236 case Intrinsic::x86_avx512_mask_max_ss_round:
3237 case Intrinsic::x86_avx512_mask_min_ss_round:
3238 case Intrinsic::x86_avx512_mask_add_sd_round:
3239 case Intrinsic::x86_avx512_mask_div_sd_round:
3240 case Intrinsic::x86_avx512_mask_mul_sd_round:
3241 case Intrinsic::x86_avx512_mask_sub_sd_round:
3242 case Intrinsic::x86_avx512_mask_max_sd_round:
3243 case Intrinsic::x86_avx512_mask_min_sd_round:
3244 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3245
3246
3247 if (!DemandedElts[0]) {
3249 return II.getArgOperand(0);
3250 }
3251
3252
3253 DemandedElts = 1;
3254 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3255 simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
3256
3257
3258
3259 if (!UndefElts2[0] || !UndefElts3[0])
3261 break;
3262
3263
3264 case Intrinsic::x86_sse3_addsub_pd:
3265 case Intrinsic::x86_sse3_addsub_ps:
3266 case Intrinsic::x86_avx_addsub_pd_256:
3267 case Intrinsic::x86_avx_addsub_ps_256: {
3268
3269
3272 bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
3273 bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
3274 if (IsSubOnly || IsAddOnly) {
3275 assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
3278 Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
3280 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3281 }
3282
3283 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3284 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3285 UndefElts &= UndefElts2;
3286 break;
3287 }
3288
3289
3290 case Intrinsic::x86_avx2_psllv_d:
3291 case Intrinsic::x86_avx2_psllv_d_256:
3292 case Intrinsic::x86_avx2_psllv_q:
3293 case Intrinsic::x86_avx2_psllv_q_256:
3294 case Intrinsic::x86_avx2_psrlv_d:
3295 case Intrinsic::x86_avx2_psrlv_d_256:
3296 case Intrinsic::x86_avx2_psrlv_q:
3297 case Intrinsic::x86_avx2_psrlv_q_256:
3298 case Intrinsic::x86_avx2_psrav_d:
3299 case Intrinsic::x86_avx2_psrav_d_256: {
3300 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3301 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3302 UndefElts &= UndefElts2;
3303 break;
3304 }
3305
3306 case Intrinsic::x86_sse2_pmulh_w:
3307 case Intrinsic::x86_avx2_pmulh_w:
3308 case Intrinsic::x86_avx512_pmulh_w_512:
3309 case Intrinsic::x86_sse2_pmulhu_w:
3310 case Intrinsic::x86_avx2_pmulhu_w:
3311 case Intrinsic::x86_avx512_pmulhu_w_512:
3312 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3313 case Intrinsic::x86_avx2_pmul_hr_sw:
3314 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3315 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3316 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3317
3318 break;
3319 }
3320
3321 case Intrinsic::x86_sse2_packssdw_128:
3322 case Intrinsic::x86_sse2_packsswb_128:
3323 case Intrinsic::x86_sse2_packuswb_128:
3324 case Intrinsic::x86_sse41_packusdw:
3325 case Intrinsic::x86_avx2_packssdw:
3326 case Intrinsic::x86_avx2_packsswb:
3327 case Intrinsic::x86_avx2_packusdw:
3328 case Intrinsic::x86_avx2_packuswb:
3329 case Intrinsic::x86_avx512_packssdw_512:
3330 case Intrinsic::x86_avx512_packsswb_512:
3331 case Intrinsic::x86_avx512_packusdw_512:
3332 case Intrinsic::x86_avx512_packuswb_512: {
3333 auto *Ty0 = II.getArgOperand(0)->getType();
3335 assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
3336
3337 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3338 unsigned VWidthPerLane = VWidth / NumLanes;
3339 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3340
3341
3342
3343
3344
3345 for (int OpNum = 0; OpNum != 2; ++OpNum) {
3346 APInt OpDemandedElts(InnerVWidth, 0);
3347 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3348 unsigned LaneIdx = Lane * VWidthPerLane;
3349 for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3350 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3351 if (DemandedElts[Idx])
3352 OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
3353 }
3354 }
3355
3356
3357 APInt OpUndefElts(InnerVWidth, 0);
3358 simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
3359
3360
3361 OpUndefElts = OpUndefElts.zext(VWidth);
3362 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3363 APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
3364 LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
3365 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3366 UndefElts |= LaneElts;
3367 }
3368 }
3369 break;
3370 }
3371
3372 case Intrinsic::x86_sse2_pmadd_wd:
3373 case Intrinsic::x86_avx2_pmadd_wd:
3374 case Intrinsic::x86_avx512_pmaddw_d_512:
3375 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3376 case Intrinsic::x86_avx2_pmadd_ub_sw:
3377 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3378
3379 auto *ArgTy = II.getArgOperand(0)->getType();
3381 assert((VWidth * 2) == InnerVWidth && "Unexpected input size");
3383 APInt Op0UndefElts(InnerVWidth, 0);
3384 APInt Op1UndefElts(InnerVWidth, 0);
3385 simplifyAndSetOp(&II, 0, OpDemandedElts, Op0UndefElts);
3386 simplifyAndSetOp(&II, 1, OpDemandedElts, Op1UndefElts);
3387
3388 break;
3389 }
3390
3391
3392 case Intrinsic::x86_ssse3_pshuf_b_128:
3393 case Intrinsic::x86_avx2_pshuf_b:
3394 case Intrinsic::x86_avx512_pshuf_b_512:
3395
3396 case Intrinsic::x86_avx_vpermilvar_ps:
3397 case Intrinsic::x86_avx_vpermilvar_ps_256:
3398 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3399 case Intrinsic::x86_avx_vpermilvar_pd:
3400 case Intrinsic::x86_avx_vpermilvar_pd_256:
3401 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3402
3403 case Intrinsic::x86_avx2_permd:
3404 case Intrinsic::x86_avx2_permps: {
3405 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
3406 break;
3407 }
3408
3409
3410
3411 case Intrinsic::x86_sse4a_extrq:
3412 case Intrinsic::x86_sse4a_extrqi:
3413 case Intrinsic::x86_sse4a_insertq:
3414 case Intrinsic::x86_sse4a_insertqi:
3416 break;
3417 }
3418 return std::nullopt;
3419}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
static unsigned getNumElements(Type *Ty)
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Definition X86InstCombineIntrinsic.cpp:669
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
Definition X86InstCombineIntrinsic.cpp:58
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Definition X86InstCombineIntrinsic.cpp:115
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
Definition X86InstCombineIntrinsic.cpp:1885
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Definition X86InstCombineIntrinsic.cpp:642
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Definition X86InstCombineIntrinsic.cpp:433
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
Definition X86InstCombineIntrinsic.cpp:30
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
Definition X86InstCombineIntrinsic.cpp:1976
static Value * simplifyX86vpermv3(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant.
Definition X86InstCombineIntrinsic.cpp:2102
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
Definition X86InstCombineIntrinsic.cpp:82
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
Definition X86InstCombineIntrinsic.cpp:2021
static Value * simplifyX86pmulh(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned, bool IsRounding)
Definition X86InstCombineIntrinsic.cpp:499
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Definition X86InstCombineIntrinsic.cpp:611
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Definition X86InstCombineIntrinsic.cpp:2068
static Value * simplifyX86pmadd(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsPMADDWD)
Definition X86InstCombineIntrinsic.cpp:557
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Definition X86InstCombineIntrinsic.cpp:1737
static bool simplifyX86VPERMMask(Instruction *II, bool IsBinary, InstCombiner &IC)
Definition X86InstCombineIntrinsic.cpp:2138
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
Definition X86InstCombineIntrinsic.cpp:1796
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
Definition X86InstCombineIntrinsic.cpp:41
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Definition X86InstCombineIntrinsic.cpp:297
This file a TargetTransformInfoImplBase conforming object specific to the X86 target machine.
The Input class is used to parse a yaml document into in-memory structs and vectors.
Class for arbitrary precision integers.
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * getExtendedElementVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const override
Definition X86InstCombineIntrinsic.cpp:3092
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition X86InstCombineIntrinsic.cpp:3132
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition X86InstCombineIntrinsic.cpp:2154
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool isa_and_nonnull(const Y &Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
FunctionAddr VTableAddr Count
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.