LLVM: lib/Target/X86/X86InstCombineIntrinsic.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

18#include "llvm/IR/IntrinsicsX86.h"

21#include

22

23using namespace llvm;

25

26#define DEBUG_TYPE "x86tti"

27

28

29

35 assert(V && "Vector must be foldable");

36 return V;

37}

38

39

40

42

45

46

50 return ExtMask;

51

52 return nullptr;

53}

54

55

56

57

59 Value *Ptr = II.getOperand(0);

60 Value *Mask = II.getOperand(1);

62

63

66

67

68

70

72 II.getType(), Ptr, Align(1), BoolMask, ZeroVec);

74 }

75

76 return nullptr;

77}

78

79

80

81

83 Value *Ptr = II.getOperand(0);

84 Value *Mask = II.getOperand(1);

85 Value *Vec = II.getOperand(2);

86

87

90 return true;

91 }

92

93

94

95 if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)

96 return false;

97

98

99

104

106

107

109 return true;

110 }

111

112 return false;

113}

114

117 bool LogicalShift = false;

118 bool ShiftLeft = false;

119 bool IsImm = false;

120

121 switch (II.getIntrinsicID()) {

122 default:

124 case Intrinsic::x86_sse2_psrai_d:

125 case Intrinsic::x86_sse2_psrai_w:

126 case Intrinsic::x86_avx2_psrai_d:

127 case Intrinsic::x86_avx2_psrai_w:

128 case Intrinsic::x86_avx512_psrai_q_128:

129 case Intrinsic::x86_avx512_psrai_q_256:

130 case Intrinsic::x86_avx512_psrai_d_512:

131 case Intrinsic::x86_avx512_psrai_q_512:

132 case Intrinsic::x86_avx512_psrai_w_512:

133 IsImm = true;

134 [[fallthrough]];

135 case Intrinsic::x86_sse2_psra_d:

136 case Intrinsic::x86_sse2_psra_w:

137 case Intrinsic::x86_avx2_psra_d:

138 case Intrinsic::x86_avx2_psra_w:

139 case Intrinsic::x86_avx512_psra_q_128:

140 case Intrinsic::x86_avx512_psra_q_256:

141 case Intrinsic::x86_avx512_psra_d_512:

142 case Intrinsic::x86_avx512_psra_q_512:

143 case Intrinsic::x86_avx512_psra_w_512:

144 LogicalShift = false;

145 ShiftLeft = false;

146 break;

147 case Intrinsic::x86_sse2_psrli_d:

148 case Intrinsic::x86_sse2_psrli_q:

149 case Intrinsic::x86_sse2_psrli_w:

150 case Intrinsic::x86_avx2_psrli_d:

151 case Intrinsic::x86_avx2_psrli_q:

152 case Intrinsic::x86_avx2_psrli_w:

153 case Intrinsic::x86_avx512_psrli_d_512:

154 case Intrinsic::x86_avx512_psrli_q_512:

155 case Intrinsic::x86_avx512_psrli_w_512:

156 IsImm = true;

157 [[fallthrough]];

158 case Intrinsic::x86_sse2_psrl_d:

159 case Intrinsic::x86_sse2_psrl_q:

160 case Intrinsic::x86_sse2_psrl_w:

161 case Intrinsic::x86_avx2_psrl_d:

162 case Intrinsic::x86_avx2_psrl_q:

163 case Intrinsic::x86_avx2_psrl_w:

164 case Intrinsic::x86_avx512_psrl_d_512:

165 case Intrinsic::x86_avx512_psrl_q_512:

166 case Intrinsic::x86_avx512_psrl_w_512:

167 LogicalShift = true;

168 ShiftLeft = false;

169 break;

170 case Intrinsic::x86_sse2_pslli_d:

171 case Intrinsic::x86_sse2_pslli_q:

172 case Intrinsic::x86_sse2_pslli_w:

173 case Intrinsic::x86_avx2_pslli_d:

174 case Intrinsic::x86_avx2_pslli_q:

175 case Intrinsic::x86_avx2_pslli_w:

176 case Intrinsic::x86_avx512_pslli_d_512:

177 case Intrinsic::x86_avx512_pslli_q_512:

178 case Intrinsic::x86_avx512_pslli_w_512:

179 IsImm = true;

180 [[fallthrough]];

181 case Intrinsic::x86_sse2_psll_d:

182 case Intrinsic::x86_sse2_psll_q:

183 case Intrinsic::x86_sse2_psll_w:

184 case Intrinsic::x86_avx2_psll_d:

185 case Intrinsic::x86_avx2_psll_q:

186 case Intrinsic::x86_avx2_psll_w:

187 case Intrinsic::x86_avx512_psll_d_512:

188 case Intrinsic::x86_avx512_psll_q_512:

189 case Intrinsic::x86_avx512_psll_w_512:

190 LogicalShift = true;

191 ShiftLeft = true;

192 break;

193 }

194 assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");

195

196 Value *Vec = II.getArgOperand(0);

197 Value *Amt = II.getArgOperand(1);

199 Type *SVT = VT->getElementType();

201 unsigned VWidth = VT->getNumElements();

203

204

205

206

207 if (IsImm) {

208 assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type");

212 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);

213 Amt = Builder.CreateVectorSplat(VWidth, Amt);

214 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)

215 : Builder.CreateLShr(Vec, Amt))

216 : Builder.CreateAShr(Vec, Amt));

217 }

219 if (LogicalShift)

221 Amt = ConstantInt::get(SVT, BitWidth - 1);

222 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));

223 }

224 } else {

225

226

229 "Unexpected shift-by-scalar type");

234 Amt, DemandedLower, II.getDataLayout());

236 Amt, DemandedUpper, II.getDataLayout());

238 (DemandedUpper.isZero() || KnownUpperBits.isZero())) {

240 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);

241 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)

242 : Builder.CreateLShr(Vec, Amt))

243 : Builder.CreateAShr(Vec, Amt));

244 }

245 }

246

247

249 if (!CDV)

250 return nullptr;

251

252

253

256 "Unexpected shift-by-scalar type");

257

258

260 for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {

261 unsigned SubEltIdx = (NumSubElts - 1) - i;

262 auto *SubElt = cast(CDV->getElementAsConstant(SubEltIdx));

264 Count |= SubElt->getValue().zextOrTrunc(64);

265 }

266

267

268 if (Count.isZero())

269 return Vec;

270

271

273

274 if (LogicalShift)

276

277

279 }

280

281

282 auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));

283 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);

284

285 if (ShiftLeft)

286 return Builder.CreateShl(Vec, ShiftVec);

287

288 if (LogicalShift)

289 return Builder.CreateLShr(Vec, ShiftVec);

290

291 return Builder.CreateAShr(Vec, ShiftVec);

292}

293

294

295

296

299 bool LogicalShift = false;

300 bool ShiftLeft = false;

301

302 switch (II.getIntrinsicID()) {

303 default:

305 case Intrinsic::x86_avx2_psrav_d:

306 case Intrinsic::x86_avx2_psrav_d_256:

307 case Intrinsic::x86_avx512_psrav_q_128:

308 case Intrinsic::x86_avx512_psrav_q_256:

309 case Intrinsic::x86_avx512_psrav_d_512:

310 case Intrinsic::x86_avx512_psrav_q_512:

311 case Intrinsic::x86_avx512_psrav_w_128:

312 case Intrinsic::x86_avx512_psrav_w_256:

313 case Intrinsic::x86_avx512_psrav_w_512:

314 LogicalShift = false;

315 ShiftLeft = false;

316 break;

317 case Intrinsic::x86_avx2_psrlv_d:

318 case Intrinsic::x86_avx2_psrlv_d_256:

319 case Intrinsic::x86_avx2_psrlv_q:

320 case Intrinsic::x86_avx2_psrlv_q_256:

321 case Intrinsic::x86_avx512_psrlv_d_512:

322 case Intrinsic::x86_avx512_psrlv_q_512:

323 case Intrinsic::x86_avx512_psrlv_w_128:

324 case Intrinsic::x86_avx512_psrlv_w_256:

325 case Intrinsic::x86_avx512_psrlv_w_512:

326 LogicalShift = true;

327 ShiftLeft = false;

328 break;

329 case Intrinsic::x86_avx2_psllv_d:

330 case Intrinsic::x86_avx2_psllv_d_256:

331 case Intrinsic::x86_avx2_psllv_q:

332 case Intrinsic::x86_avx2_psllv_q_256:

333 case Intrinsic::x86_avx512_psllv_d_512:

334 case Intrinsic::x86_avx512_psllv_q_512:

335 case Intrinsic::x86_avx512_psllv_w_128:

336 case Intrinsic::x86_avx512_psllv_w_256:

337 case Intrinsic::x86_avx512_psllv_w_512:

338 LogicalShift = true;

339 ShiftLeft = true;

340 break;

341 }

342 assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");

343

344 Value *Vec = II.getArgOperand(0);

345 Value *Amt = II.getArgOperand(1);

347 Type *SVT = VT->getElementType();

348 int NumElts = VT->getNumElements();

350

351

352

356 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)

357 : Builder.CreateLShr(Vec, Amt))

358 : Builder.CreateAShr(Vec, Amt));

359 }

360

361

363 if (!CShift)

364 return nullptr;

365

366

367

368 bool AnyOutOfRange = false;

370 for (int I = 0; I < NumElts; ++I) {

371 auto *CElt = CShift->getAggregateElement(I);

374 continue;

375 }

376

378 if (!COp)

379 return nullptr;

380

381

382

383

384 APInt ShiftVal = COp->getValue();

386 AnyOutOfRange = LogicalShift;

388 continue;

389 }

390

392 }

393

394

395

396 auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };

399 for (int Idx : ShiftAmts) {

400 if (Idx < 0) {

402 } else {

403 assert(LogicalShift && "Logical shift expected");

405 }

406 }

408 }

409

410

411 if (AnyOutOfRange)

412 return nullptr;

413

414

416 for (int Idx : ShiftAmts) {

417 if (Idx < 0)

419 else

420 ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));

421 }

423

424 if (ShiftLeft)

425 return Builder.CreateShl(Vec, ShiftVec);

426

427 if (LogicalShift)

428 return Builder.CreateLShr(Vec, ShiftVec);

429

430 return Builder.CreateAShr(Vec, ShiftVec);

431}

432

435 Value *Arg0 = II.getArgOperand(0);

436 Value *Arg1 = II.getArgOperand(1);

437 Type *ResTy = II.getType();

438

439

442

445 unsigned NumSrcElts = ArgTy->getNumElements();

447 "Unexpected packing types");

448

449 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;

451 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();

452 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&

453 "Unexpected packing types");

454

455

457 return nullptr;

458

459

460

461 APInt MinValue, MaxValue;

462 if (IsSigned) {

463

464

465

466 MinValue =

468 MaxValue =

470 } else {

471

472

473

476 }

477

480 Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);

481 Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);

482 Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);

483 Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);

484

485

487 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {

488 for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)

489 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));

490 for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)

491 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);

492 }

493 auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);

494

495

496 return Builder.CreateTrunc(Shuffle, ResTy);

497}

498

501 bool IsRounding) {

502 Value *Arg0 = II.getArgOperand(0);

503 Value *Arg1 = II.getArgOperand(1);

506 assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&

507 "Unexpected PMULH types");

508 assert((!IsRounding || IsSigned) && "PMULHRS instruction must be signed");

509

510

513

514

517

518

519 if (!IsRounding) {

521 return IsSigned ? Builder.CreateAShr(Arg1, 15)

524 return IsSigned ? Builder.CreateAShr(Arg0, 15)

526 }

527

528

530 return nullptr;

531

532

533 auto Cast =

534 IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;

536 Value *LHS = Builder.CreateCast(Cast, Arg0, ExtTy);

537 Value *RHS = Builder.CreateCast(Cast, Arg1, ExtTy);

539

540 if (IsRounding) {

541

542

545 Mul = Builder.CreateLShr(Mul, 14);

546 Mul = Builder.CreateTrunc(Mul, RndTy);

547 Mul = Builder.CreateAdd(Mul, ConstantInt::get(RndTy, 1));

548 Mul = Builder.CreateLShr(Mul, 1);

549 } else {

550

551 Mul = Builder.CreateLShr(Mul, 16);

552 }

553

554 return Builder.CreateTrunc(Mul, ResTy);

555}

556

559 bool IsPMADDWD) {

560 Value *Arg0 = II.getArgOperand(0);

561 Value *Arg1 = II.getArgOperand(1);

564

565 unsigned NumDstElts = ResTy->getNumElements();

566 assert(ArgTy->getNumElements() == (2 * NumDstElts) &&

567 ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&

568 "Unexpected PMADD types");

569

570

573

574

577

578

580 return nullptr;

581

582

583

584

585

586

588 for (unsigned I = 0; I != NumDstElts; ++I) {

591 }

592

593 auto *LHSLo = Builder.CreateShuffleVector(Arg0, LoMask);

594 auto *LHSHi = Builder.CreateShuffleVector(Arg0, HiMask);

595 auto *RHSLo = Builder.CreateShuffleVector(Arg1, LoMask);

596 auto *RHSHi = Builder.CreateShuffleVector(Arg1, HiMask);

597

598 auto LHSCast =

599 IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;

600 LHSLo = Builder.CreateCast(LHSCast, LHSLo, ResTy);

601 LHSHi = Builder.CreateCast(LHSCast, LHSHi, ResTy);

602 RHSLo = Builder.CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);

603 RHSHi = Builder.CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);

604 Value *Lo = Builder.CreateMul(LHSLo, RHSLo);

605 Value *Hi = Builder.CreateMul(LHSHi, RHSHi);

606 return IsPMADDWD

607 ? Builder.CreateAdd(Lo, Hi)

608 : Builder.CreateIntrinsic(ResTy, Intrinsic::sadd_sat, {Lo, Hi});

609}

610

613 Value *Arg = II.getArgOperand(0);

614 Type *ResTy = II.getType();

615

616

619

620

621

622 if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb)

623 return nullptr;

624

626

627

628

629

630

631

632 unsigned NumElts = ArgTy->getNumElements();

634

636 Res = Builder.CreateIsNeg(Res);

637 Res = Builder.CreateBitCast(Res, IntegerTy);

638 Res = Builder.CreateZExtOrTrunc(Res, ResTy);

639 return Res;

640}

641

644 Value *CarryIn = II.getArgOperand(0);

645 Value *Op1 = II.getArgOperand(1);

646 Value *Op2 = II.getArgOperand(2);

647 Type *RetTy = II.getType();

651 "Unexpected types for x86 addcarry");

652

653

655 Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,

656 {Op1, Op2});

657

658 Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);

659 Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),

660 Builder.getInt8Ty());

662 Res = Builder.CreateInsertValue(Res, UAddOV, 0);

663 return Builder.CreateInsertValue(Res, UAddResult, 1);

664 }

665

666 return nullptr;

667}

668

671

673 if (!ArgImm || ArgImm->getValue().uge(256))

674 return nullptr;

675

676 Value *ArgA = II.getArgOperand(0);

677 Value *ArgB = II.getArgOperand(1);

678 Value *ArgC = II.getArgOperand(2);

679

680 Type *Ty = II.getType();

681

682 auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {

683 return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};

684 };

685 auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {

686 return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};

687 };

688 auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {

689 return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};

690 };

691 auto Not = [&](auto V) -> std::pair<Value *, uint8_t> {

692 return {Builder.CreateNot(V.first), ~V.second};

693 };

694 auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); };

695 auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); };

696 auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); };

697

701

702 bool ABIsConst = AIsConst && BIsConst;

703 bool ACIsConst = AIsConst && CIsConst;

704 bool BCIsConst = BIsConst && CIsConst;

705 bool ABCIsConst = AIsConst && BIsConst && CIsConst;

706

707

708

709

710

711 std::pair<Value *, uint8_t> A = {ArgA, 0xf0};

712 std::pair<Value *, uint8_t> B = {ArgB, 0xcc};

713 std::pair<Value *, uint8_t> C = {ArgC, 0xaa};

714 std::pair<Value *, uint8_t> Res = {nullptr, 0};

715

716

717

718

719

720

721 uint8_t Imm = ArgImm->getValue().getZExtValue();

722 switch (Imm) {

723 case 0x0:

725 break;

726 case 0x1:

727 if (ABCIsConst)

728 Res = Nor(Or(A, B), C);

729 break;

730 case 0x2:

731 if (ABCIsConst)

732 Res = And(Nor(A, B), C);

733 break;

734 case 0x3:

735 if (ABIsConst)

736 Res = Nor(A, B);

737 break;

738 case 0x4:

739 if (ABCIsConst)

740 Res = And(Nor(A, C), B);

741 break;

742 case 0x5:

743 if (ACIsConst)

744 Res = Nor(A, C);

745 break;

746 case 0x6:

747 if (ABCIsConst)

748 Res = Nor(A, Xnor(B, C));

749 break;

750 case 0x7:

751 if (ABCIsConst)

752 Res = Nor(A, And(B, C));

753 break;

754 case 0x8:

755 if (ABCIsConst)

756 Res = Nor(A, Nand(B, C));

757 break;

758 case 0x9:

759 if (ABCIsConst)

760 Res = Nor(A, Xor(B, C));

761 break;

762 case 0xa:

763 if (ACIsConst)

764 Res = Nor(A, Not(C));

765 break;

766 case 0xb:

767 if (ABCIsConst)

768 Res = Nor(A, Nor(C, Not(B)));

769 break;

770 case 0xc:

771 if (ABIsConst)

772 Res = Nor(A, Not(B));

773 break;

774 case 0xd:

775 if (ABCIsConst)

776 Res = Nor(A, Nor(B, Not(C)));

777 break;

778 case 0xe:

779 if (ABCIsConst)

780 Res = Nor(A, Nor(B, C));

781 break;

782 case 0xf:

783 Res = Not(A);

784 break;

785 case 0x10:

786 if (ABCIsConst)

787 Res = And(A, Nor(B, C));

788 break;

789 case 0x11:

790 if (BCIsConst)

791 Res = Nor(B, C);

792 break;

793 case 0x12:

794 if (ABCIsConst)

795 Res = Nor(Xnor(A, C), B);

796 break;

797 case 0x13:

798 if (ABCIsConst)

799 Res = Nor(And(A, C), B);

800 break;

801 case 0x14:

802 if (ABCIsConst)

803 Res = Nor(Xnor(A, B), C);

804 break;

805 case 0x15:

806 if (ABCIsConst)

807 Res = Nor(And(A, B), C);

808 break;

809 case 0x16:

810 if (ABCIsConst)

812 break;

813 case 0x17:

814 if (ABCIsConst)

816 break;

817 case 0x18:

818 if (ABCIsConst)

819 Res = Nor(Xnor(A, B), Xnor(A, C));

820 break;

821 case 0x19:

822 if (ABCIsConst)

823 Res = And(Nand(A, B), Xnor(B, C));

824 break;

825 case 0x1a:

826 if (ABCIsConst)

828 break;

829 case 0x1b:

830 if (ABCIsConst)

832 break;

833 case 0x1c:

834 if (ABCIsConst)

836 break;

837 case 0x1d:

838 if (ABCIsConst)

840 break;

841 case 0x1e:

842 if (ABCIsConst)

844 break;

845 case 0x1f:

846 if (ABCIsConst)

847 Res = Nand(A, Or(B, C));

848 break;

849 case 0x20:

850 if (ABCIsConst)

851 Res = Nor(Nand(A, C), B);

852 break;

853 case 0x21:

854 if (ABCIsConst)

855 Res = Nor(Xor(A, C), B);

856 break;

857 case 0x22:

858 if (BCIsConst)

859 Res = Nor(B, Not(C));

860 break;

861 case 0x23:

862 if (ABCIsConst)

863 Res = Nor(B, Nor(C, Not(A)));

864 break;

865 case 0x24:

866 if (ABCIsConst)

867 Res = Nor(Xnor(A, B), Xor(A, C));

868 break;

869 case 0x25:

870 if (ABCIsConst)

871 Res = Xor(A, Nand(Nand(A, B), C));

872 break;

873 case 0x26:

874 if (ABCIsConst)

876 break;

877 case 0x27:

878 if (ABCIsConst)

880 break;

881 case 0x28:

882 if (ABCIsConst)

884 break;

885 case 0x29:

886 if (ABCIsConst)

888 break;

889 case 0x2a:

890 if (ABCIsConst)

891 Res = And(Nand(A, B), C);

892 break;

893 case 0x2b:

894 if (ABCIsConst)

896 break;

897 case 0x2c:

898 if (ABCIsConst)

899 Res = Nor(Xnor(A, B), Nor(B, C));

900 break;

901 case 0x2d:

902 if (ABCIsConst)

904 break;

905 case 0x2e:

906 if (ABCIsConst)

908 break;

909 case 0x2f:

910 if (ABCIsConst)

911 Res = Nand(A, Or(B, Not(C)));

912 break;

913 case 0x30:

914 if (ABIsConst)

915 Res = Nor(B, Not(A));

916 break;

917 case 0x31:

918 if (ABCIsConst)

919 Res = Nor(Nor(A, Not(C)), B);

920 break;

921 case 0x32:

922 if (ABCIsConst)

923 Res = Nor(Nor(A, C), B);

924 break;

925 case 0x33:

926 Res = Not(B);

927 break;

928 case 0x34:

929 if (ABCIsConst)

931 break;

932 case 0x35:

933 if (ABCIsConst)

935 break;

936 case 0x36:

937 if (ABCIsConst)

939 break;

940 case 0x37:

941 if (ABCIsConst)

942 Res = Nand(Or(A, C), B);

943 break;

944 case 0x38:

945 if (ABCIsConst)

946 Res = Nor(Xnor(A, B), Nor(A, C));

947 break;

948 case 0x39:

949 if (ABCIsConst)

951 break;

952 case 0x3a:

953 if (ABCIsConst)

955 break;

956 case 0x3b:

957 if (ABCIsConst)

958 Res = Nand(Or(A, Not(C)), B);

959 break;

960 case 0x3c:

962 break;

963 case 0x3d:

964 if (ABCIsConst)

966 break;

967 case 0x3e:

968 if (ABCIsConst)

969 Res = Xor(A, Or(Nor(A, Not(C)), B));

970 break;

971 case 0x3f:

972 if (ABIsConst)

973 Res = Nand(A, B);

974 break;

975 case 0x40:

976 if (ABCIsConst)

977 Res = Nor(Nand(A, B), C);

978 break;

979 case 0x41:

980 if (ABCIsConst)

981 Res = Nor(Xor(A, B), C);

982 break;

983 case 0x42:

984 if (ABCIsConst)

985 Res = Nor(Xor(A, B), Xnor(A, C));

986 break;

987 case 0x43:

988 if (ABCIsConst)

989 Res = Xor(A, Nand(Nand(A, C), B));

990 break;

991 case 0x44:

992 if (BCIsConst)

993 Res = Nor(C, Not(B));

994 break;

995 case 0x45:

996 if (ABCIsConst)

997 Res = Nor(Nor(B, Not(A)), C);

998 break;

999 case 0x46:

1000 if (ABCIsConst)

1002 break;

1003 case 0x47:

1004 if (ABCIsConst)

1005 Res = Xor(Or(Xnor(A, C), B), C);

1006 break;

1007 case 0x48:

1008 if (ABCIsConst)

1010 break;

1011 case 0x49:

1012 if (ABCIsConst)

1014 break;

1015 case 0x4a:

1016 if (ABCIsConst)

1017 Res = Nor(Xnor(A, C), Nor(B, C));

1018 break;

1019 case 0x4b:

1020 if (ABCIsConst)

1021 Res = Xor(A, Or(C, Not(B)));

1022 break;

1023 case 0x4c:

1024 if (ABCIsConst)

1025 Res = And(Nand(A, C), B);

1026 break;

1027 case 0x4d:

1028 if (ABCIsConst)

1030 break;

1031 case 0x4e:

1032 if (ABCIsConst)

1034 break;

1035 case 0x4f:

1036 if (ABCIsConst)

1037 Res = Nand(A, Nand(B, Not(C)));

1038 break;

1039 case 0x50:

1040 if (ACIsConst)

1041 Res = Nor(C, Not(A));

1042 break;

1043 case 0x51:

1044 if (ABCIsConst)

1045 Res = Nor(Nor(A, Not(B)), C);

1046 break;

1047 case 0x52:

1048 if (ABCIsConst)

1050 break;

1051 case 0x53:

1052 if (ABCIsConst)

1053 Res = Xor(Or(Xnor(B, C), A), C);

1054 break;

1055 case 0x54:

1056 if (ABCIsConst)

1057 Res = Nor(Nor(A, B), C);

1058 break;

1059 case 0x55:

1060 Res = Not(C);

1061 break;

1062 case 0x56:

1063 if (ABCIsConst)

1065 break;

1066 case 0x57:

1067 if (ABCIsConst)

1068 Res = Nand(Or(A, B), C);

1069 break;

1070 case 0x58:

1071 if (ABCIsConst)

1072 Res = Nor(Nor(A, B), Xnor(A, C));

1073 break;

1074 case 0x59:

1075 if (ABCIsConst)

1076 Res = Xor(Or(A, Not(B)), C);

1077 break;

1078 case 0x5a:

1079 Res = Xor(A, C);

1080 break;

1081 case 0x5b:

1082 if (ABCIsConst)

1084 break;

1085 case 0x5c:

1086 if (ABCIsConst)

1088 break;

1089 case 0x5d:

1090 if (ABCIsConst)

1091 Res = Nand(Or(A, Not(B)), C);

1092 break;

1093 case 0x5e:

1094 if (ABCIsConst)

1095 Res = Xor(A, Or(Nor(A, Not(B)), C));

1096 break;

1097 case 0x5f:

1098 if (ACIsConst)

1099 Res = Nand(A, C);

1100 break;

1101 case 0x60:

1102 if (ABCIsConst)

1104 break;

1105 case 0x61:

1106 if (ABCIsConst)

1108 break;

1109 case 0x62:

1110 if (ABCIsConst)

1111 Res = Nor(Nor(A, C), Xnor(B, C));

1112 break;

1113 case 0x63:

1114 if (ABCIsConst)

1115 Res = Xor(B, Or(C, Not(A)));

1116 break;

1117 case 0x64:

1118 if (ABCIsConst)

1119 Res = Nor(Nor(A, B), Xnor(B, C));

1120 break;

1121 case 0x65:

1122 if (ABCIsConst)

1123 Res = Xor(Or(B, Not(A)), C);

1124 break;

1125 case 0x66:

1126 Res = Xor(B, C);

1127 break;

1128 case 0x67:

1129 if (ABCIsConst)

1131 break;

1132 case 0x68:

1133 if (ABCIsConst)

1134 Res = Xor(Xor(A, B), Nor(Nor(A, B), C));

1135 break;

1136 case 0x69:

1137 if (ABCIsConst)

1138 Res = Xor(Xnor(A, B), C);

1139 break;

1140 case 0x6a:

1141 if (ABCIsConst)

1143 break;

1144 case 0x6b:

1145 if (ABCIsConst)

1146 Res = Or(Nor(A, B), Xor(Xnor(A, B), C));

1147 break;

1148 case 0x6c:

1149 if (ABCIsConst)

1151 break;

1152 case 0x6d:

1153 if (ABCIsConst)

1154 Res = Xor(Or(Xnor(A, B), Nor(A, C)), C);

1155 break;

1156 case 0x6e:

1157 if (ABCIsConst)

1158 Res = Or(Nor(A, Not(B)), Xor(B, C));

1159 break;

1160 case 0x6f:

1161 if (ABCIsConst)

1162 Res = Nand(A, Xnor(B, C));

1163 break;

1164 case 0x70:

1165 if (ABCIsConst)

1166 Res = And(A, Nand(B, C));

1167 break;

1168 case 0x71:

1169 if (ABCIsConst)

1171 break;

1172 case 0x72:

1173 if (ABCIsConst)

1175 break;

1176 case 0x73:

1177 if (ABCIsConst)

1178 Res = Nand(Nand(A, Not(C)), B);

1179 break;

1180 case 0x74:

1181 if (ABCIsConst)

1183 break;

1184 case 0x75:

1185 if (ABCIsConst)

1186 Res = Nand(Nand(A, Not(B)), C);

1187 break;

1188 case 0x76:

1189 if (ABCIsConst)

1190 Res = Xor(B, Or(Nor(B, Not(A)), C));

1191 break;

1192 case 0x77:

1193 if (BCIsConst)

1194 Res = Nand(B, C);

1195 break;

1196 case 0x78:

1197 if (ABCIsConst)

1199 break;

1200 case 0x79:

1201 if (ABCIsConst)

1202 Res = Xor(Or(Xnor(A, B), Nor(B, C)), C);

1203 break;

1204 case 0x7a:

1205 if (ABCIsConst)

1206 Res = Or(Xor(A, C), Nor(B, Not(A)));

1207 break;

1208 case 0x7b:

1209 if (ABCIsConst)

1210 Res = Nand(Xnor(A, C), B);

1211 break;

1212 case 0x7c:

1213 if (ABCIsConst)

1214 Res = Or(Xor(A, B), Nor(C, Not(A)));

1215 break;

1216 case 0x7d:

1217 if (ABCIsConst)

1218 Res = Nand(Xnor(A, B), C);

1219 break;

1220 case 0x7e:

1221 if (ABCIsConst)

1223 break;

1224 case 0x7f:

1225 if (ABCIsConst)

1226 Res = Nand(And(A, B), C);

1227 break;

1228 case 0x80:

1229 if (ABCIsConst)

1231 break;

1232 case 0x81:

1233 if (ABCIsConst)

1235 break;

1236 case 0x82:

1237 if (ABCIsConst)

1238 Res = And(Xnor(A, B), C);

1239 break;

1240 case 0x83:

1241 if (ABCIsConst)

1242 Res = Nor(Xor(A, B), Nor(C, Not(A)));

1243 break;

1244 case 0x84:

1245 if (ABCIsConst)

1246 Res = And(Xnor(A, C), B);

1247 break;

1248 case 0x85:

1249 if (ABCIsConst)

1250 Res = Nor(Xor(A, C), Nor(B, Not(A)));

1251 break;

1252 case 0x86:

1253 if (ABCIsConst)

1254 Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C);

1255 break;

1256 case 0x87:

1257 if (ABCIsConst)

1258 Res = Xor(A, Nand(B, C));

1259 break;

1260 case 0x88:

1261 Res = And(B, C);

1262 break;

1263 case 0x89:

1264 if (ABCIsConst)

1265 Res = Xor(B, Nor(Nor(B, Not(A)), C));

1266 break;

1267 case 0x8a:

1268 if (ABCIsConst)

1269 Res = And(Nand(A, Not(B)), C);

1270 break;

1271 case 0x8b:

1272 if (ABCIsConst)

1274 break;

1275 case 0x8c:

1276 if (ABCIsConst)

1277 Res = And(Nand(A, Not(C)), B);

1278 break;

1279 case 0x8d:

1280 if (ABCIsConst)

1282 break;

1283 case 0x8e:

1284 if (ABCIsConst)

1286 break;

1287 case 0x8f:

1288 if (ABCIsConst)

1289 Res = Nand(A, Nand(B, C));

1290 break;

1291 case 0x90:

1292 if (ABCIsConst)

1293 Res = And(A, Xnor(B, C));

1294 break;

1295 case 0x91:

1296 if (ABCIsConst)

1297 Res = Nor(Nor(A, Not(B)), Xor(B, C));

1298 break;

1299 case 0x92:

1300 if (ABCIsConst)

1301 Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C);

1302 break;

1303 case 0x93:

1304 if (ABCIsConst)

1305 Res = Xor(Nand(A, C), B);

1306 break;

1307 case 0x94:

1308 if (ABCIsConst)

1309 Res = Nor(Nor(A, B), Xor(Xnor(A, B), C));

1310 break;

1311 case 0x95:

1312 if (ABCIsConst)

1313 Res = Xor(Nand(A, B), C);

1314 break;

1315 case 0x96:

1316 if (ABCIsConst)

1318 break;

1319 case 0x97:

1320 if (ABCIsConst)

1322 break;

1323 case 0x98:

1324 if (ABCIsConst)

1325 Res = Nor(Nor(A, B), Xor(B, C));

1326 break;

1327 case 0x99:

1328 if (BCIsConst)

1329 Res = Xnor(B, C);

1330 break;

1331 case 0x9a:

1332 if (ABCIsConst)

1333 Res = Xor(Nor(B, Not(A)), C);

1334 break;

1335 case 0x9b:

1336 if (ABCIsConst)

1337 Res = Or(Nor(A, B), Xnor(B, C));

1338 break;

1339 case 0x9c:

1340 if (ABCIsConst)

1341 Res = Xor(B, Nor(C, Not(A)));

1342 break;

1343 case 0x9d:

1344 if (ABCIsConst)

1345 Res = Or(Nor(A, C), Xnor(B, C));

1346 break;

1347 case 0x9e:

1348 if (ABCIsConst)

1350 break;

1351 case 0x9f:

1352 if (ABCIsConst)

1353 Res = Nand(A, Xor(B, C));

1354 break;

1355 case 0xa0:

1356 Res = And(A, C);

1357 break;

1358 case 0xa1:

1359 if (ABCIsConst)

1360 Res = Xor(A, Nor(Nor(A, Not(B)), C));

1361 break;

1362 case 0xa2:

1363 if (ABCIsConst)

1364 Res = And(Or(A, Not(B)), C);

1365 break;

1366 case 0xa3:

1367 if (ABCIsConst)

1369 break;

1370 case 0xa4:

1371 if (ABCIsConst)

1372 Res = Xor(A, Nor(Nor(A, B), C));

1373 break;

1374 case 0xa5:

1375 if (ACIsConst)

1376 Res = Xnor(A, C);

1377 break;

1378 case 0xa6:

1379 if (ABCIsConst)

1380 Res = Xor(Nor(A, Not(B)), C);

1381 break;

1382 case 0xa7:

1383 if (ABCIsConst)

1384 Res = Or(Nor(A, B), Xnor(A, C));

1385 break;

1386 case 0xa8:

1387 if (ABCIsConst)

1389 break;

1390 case 0xa9:

1391 if (ABCIsConst)

1392 Res = Xor(Nor(A, B), C);

1393 break;

1394 case 0xaa:

1395 Res = C;

1396 break;

1397 case 0xab:

1398 if (ABCIsConst)

1399 Res = Or(Nor(A, B), C);

1400 break;

1401 case 0xac:

1402 if (ABCIsConst)

1403 Res = Xor(Nor(Xnor(B, C), A), C);

1404 break;

1405 case 0xad:

1406 if (ABCIsConst)

1407 Res = Or(Xnor(A, C), And(B, C));

1408 break;

1409 case 0xae:

1410 if (ABCIsConst)

1411 Res = Or(Nor(A, Not(B)), C);

1412 break;

1413 case 0xaf:

1414 if (ACIsConst)

1415 Res = Or(C, Not(A));

1416 break;

1417 case 0xb0:

1418 if (ABCIsConst)

1419 Res = And(A, Nand(B, Not(C)));

1420 break;

1421 case 0xb1:

1422 if (ABCIsConst)

1424 break;

1425 case 0xb2:

1426 if (ABCIsConst)

1427 Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A);

1428 break;

1429 case 0xb3:

1430 if (ABCIsConst)

1431 Res = Nand(Nand(A, C), B);

1432 break;

1433 case 0xb4:

1434 if (ABCIsConst)

1435 Res = Xor(A, Nor(C, Not(B)));

1436 break;

1437 case 0xb5:

1438 if (ABCIsConst)

1439 Res = Or(Xnor(A, C), Nor(B, C));

1440 break;

1441 case 0xb6:

1442 if (ABCIsConst)

1444 break;

1445 case 0xb7:

1446 if (ABCIsConst)

1447 Res = Nand(Xor(A, C), B);

1448 break;

1449 case 0xb8:

1450 if (ABCIsConst)

1451 Res = Xor(Nor(Xnor(A, C), B), C);

1452 break;

1453 case 0xb9:

1454 if (ABCIsConst)

1456 break;

1457 case 0xba:

1458 if (ABCIsConst)

1459 Res = Or(Nor(B, Not(A)), C);

1460 break;

1461 case 0xbb:

1462 if (BCIsConst)

1463 Res = Or(C, Not(B));

1464 break;

1465 case 0xbc:

1466 if (ABCIsConst)

1468 break;

1469 case 0xbd:

1470 if (ABCIsConst)

1471 Res = Or(Xor(A, B), Xnor(A, C));

1472 break;

1473 case 0xbe:

1474 if (ABCIsConst)

1476 break;

1477 case 0xbf:

1478 if (ABCIsConst)

1479 Res = Or(Nand(A, B), C);

1480 break;

1481 case 0xc0:

1482 Res = And(A, B);

1483 break;

1484 case 0xc1:

1485 if (ABCIsConst)

1486 Res = Xor(A, Nor(Nor(A, Not(C)), B));

1487 break;

1488 case 0xc2:

1489 if (ABCIsConst)

1490 Res = Xor(A, Nor(Nor(A, C), B));

1491 break;

1492 case 0xc3:

1493 if (ABIsConst)

1494 Res = Xnor(A, B);

1495 break;

1496 case 0xc4:

1497 if (ABCIsConst)

1498 Res = And(Or(A, Not(C)), B);

1499 break;

1500 case 0xc5:

1501 if (ABCIsConst)

1503 break;

1504 case 0xc6:

1505 if (ABCIsConst)

1506 Res = Xor(Nor(A, Not(C)), B);

1507 break;

1508 case 0xc7:

1509 if (ABCIsConst)

1510 Res = Or(Xnor(A, B), Nor(A, C));

1511 break;

1512 case 0xc8:

1513 if (ABCIsConst)

1515 break;

1516 case 0xc9:

1517 if (ABCIsConst)

1518 Res = Xor(Nor(A, C), B);

1519 break;

1520 case 0xca:

1521 if (ABCIsConst)

1522 Res = Xor(B, Nor(A, Xnor(B, C)));

1523 break;

1524 case 0xcb:

1525 if (ABCIsConst)

1526 Res = Or(Xnor(A, B), And(B, C));

1527 break;

1528 case 0xcc:

1529 Res = B;

1530 break;

1531 case 0xcd:

1532 if (ABCIsConst)

1533 Res = Or(Nor(A, C), B);

1534 break;

1535 case 0xce:

1536 if (ABCIsConst)

1537 Res = Or(Nor(A, Not(C)), B);

1538 break;

1539 case 0xcf:

1540 if (ABIsConst)

1541 Res = Or(B, Not(A));

1542 break;

1543 case 0xd0:

1544 if (ABCIsConst)

1545 Res = And(A, Or(B, Not(C)));

1546 break;

1547 case 0xd1:

1548 if (ABCIsConst)

1550 break;

1551 case 0xd2:

1552 if (ABCIsConst)

1553 Res = Xor(A, Nor(B, Not(C)));

1554 break;

1555 case 0xd3:

1556 if (ABCIsConst)

1557 Res = Or(Xnor(A, B), Nor(B, C));

1558 break;

1559 case 0xd4:

1560 if (ABCIsConst)

1561 Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A);

1562 break;

1563 case 0xd5:

1564 if (ABCIsConst)

1565 Res = Nand(Nand(A, B), C);

1566 break;

1567 case 0xd6:

1568 if (ABCIsConst)

1570 break;

1571 case 0xd7:

1572 if (ABCIsConst)

1573 Res = Nand(Xor(A, B), C);

1574 break;

1575 case 0xd8:

1576 if (ABCIsConst)

1577 Res = Xor(Nor(Xnor(A, B), C), B);

1578 break;

1579 case 0xd9:

1580 if (ABCIsConst)

1581 Res = Or(And(A, B), Xnor(B, C));

1582 break;

1583 case 0xda:

1584 if (ABCIsConst)

1586 break;

1587 case 0xdb:

1588 if (ABCIsConst)

1589 Res = Or(Xnor(A, B), Xor(A, C));

1590 break;

1591 case 0xdc:

1592 if (ABCIsConst)

1593 Res = Or(B, Nor(C, Not(A)));

1594 break;

1595 case 0xdd:

1596 if (BCIsConst)

1597 Res = Or(B, Not(C));

1598 break;

1599 case 0xde:

1600 if (ABCIsConst)

1602 break;

1603 case 0xdf:

1604 if (ABCIsConst)

1605 Res = Or(Nand(A, C), B);

1606 break;

1607 case 0xe0:

1608 if (ABCIsConst)

1610 break;

1611 case 0xe1:

1612 if (ABCIsConst)

1613 Res = Xor(A, Nor(B, C));

1614 break;

1615 case 0xe2:

1616 if (ABCIsConst)

1617 Res = Xor(A, Nor(Xnor(A, C), B));

1618 break;

1619 case 0xe3:

1620 if (ABCIsConst)

1622 break;

1623 case 0xe4:

1624 if (ABCIsConst)

1625 Res = Xor(A, Nor(Xnor(A, B), C));

1626 break;

1627 case 0xe5:

1628 if (ABCIsConst)

1630 break;

1631 case 0xe6:

1632 if (ABCIsConst)

1634 break;

1635 case 0xe7:

1636 if (ABCIsConst)

1637 Res = Or(Xnor(A, B), Xnor(A, C));

1638 break;

1639 case 0xe8:

1640 if (ABCIsConst)

1641 Res = Xor(Or(A, B), Nor(Xnor(A, B), C));

1642 break;

1643 case 0xe9:

1644 if (ABCIsConst)

1645 Res = Xor(Xor(A, B), Nand(Nand(A, B), C));

1646 break;

1647 case 0xea:

1648 if (ABCIsConst)

1650 break;

1651 case 0xeb:

1652 if (ABCIsConst)

1653 Res = Or(Xnor(A, B), C);

1654 break;

1655 case 0xec:

1656 if (ABCIsConst)

1658 break;

1659 case 0xed:

1660 if (ABCIsConst)

1661 Res = Or(Xnor(A, C), B);

1662 break;

1663 case 0xee:

1664 Res = Or(B, C);

1665 break;

1666 case 0xef:

1667 if (ABCIsConst)

1668 Res = Nand(A, Nor(B, C));

1669 break;

1670 case 0xf0:

1671 Res = A;

1672 break;

1673 case 0xf1:

1674 if (ABCIsConst)

1675 Res = Or(A, Nor(B, C));

1676 break;

1677 case 0xf2:

1678 if (ABCIsConst)

1679 Res = Or(A, Nor(B, Not(C)));

1680 break;

1681 case 0xf3:

1682 if (ABIsConst)

1683 Res = Or(A, Not(B));

1684 break;

1685 case 0xf4:

1686 if (ABCIsConst)

1687 Res = Or(A, Nor(C, Not(B)));

1688 break;

1689 case 0xf5:

1690 if (ACIsConst)

1691 Res = Or(A, Not(C));

1692 break;

1693 case 0xf6:

1694 if (ABCIsConst)

1696 break;

1697 case 0xf7:

1698 if (ABCIsConst)

1699 Res = Or(A, Nand(B, C));

1700 break;

1701 case 0xf8:

1702 if (ABCIsConst)

1704 break;

1705 case 0xf9:

1706 if (ABCIsConst)

1707 Res = Or(A, Xnor(B, C));

1708 break;

1709 case 0xfa:

1710 Res = Or(A, C);

1711 break;

1712 case 0xfb:

1713 if (ABCIsConst)

1714 Res = Nand(Nor(A, C), B);

1715 break;

1716 case 0xfc:

1717 Res = Or(A, B);

1718 break;

1719 case 0xfd:

1720 if (ABCIsConst)

1721 Res = Nand(Nor(A, B), C);

1722 break;

1723 case 0xfe:

1724 if (ABCIsConst)

1726 break;

1727 case 0xff:

1729 break;

1730 }

1731

1732 assert((Res.first == nullptr || Res.second == Imm) &&

1733 "Simplification of ternary logic does not verify!");

1734 return Res.first;

1735}

1736

1740 if (!CInt)

1741 return nullptr;

1742

1744 assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");

1745

1746

1747

1748

1749

1750

1751 uint8_t Imm = CInt->getZExtValue();

1752 uint8_t ZMask = Imm & 0xf;

1753 uint8_t DestLane = (Imm >> 4) & 0x3;

1754 uint8_t SourceLane = (Imm >> 6) & 0x3;

1755

1757

1758

1759

1760 if (ZMask == 0xf)

1761 return ZeroVector;

1762

1763

1764 int ShuffleMask[4] = {0, 1, 2, 3};

1765

1766

1767 Value *V1 = II.getArgOperand(1);

1768

1769 if (ZMask) {

1770

1771

1772 if ((II.getArgOperand(0) == II.getArgOperand(1)) ||

1773 (ZMask & (1 << DestLane))) {

1774 V1 = ZeroVector;

1775

1776

1777 ShuffleMask[DestLane] = SourceLane;

1778

1779 for (unsigned i = 0; i < 4; ++i)

1780 if ((ZMask >> i) & 0x1)

1781 ShuffleMask[i] = i + 4;

1782 } else {

1783

1784 return nullptr;

1785 }

1786 } else {

1787

1788 ShuffleMask[DestLane] = SourceLane + 4;

1789 }

1790

1791 return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);

1792}

1793

1794

1795

1799 auto LowConstantHighUndef = [&](uint64_t Val) {

1801 Constant *Args[] = {ConstantInt::get(IntTy64, Val),

1804 };

1805

1806

1808 auto *CI0 =

1810 : nullptr;

1811

1812

1813 if (CILength && CIIndex) {

1814

1815

1818

1820

1821

1822

1824

1825

1826

1827 unsigned End = Index + Length;

1828

1829

1830

1831

1832

1833 if (End > 64)

1835

1836

1837

1838 if ((Length % 8) == 0 && (Index % 8) == 0) {

1839

1841 Index /= 8;

1842

1845

1847 for (int i = 0; i != (int)Length; ++i)

1848 ShuffleMask.push_back(i + Index);

1849 for (int i = Length; i != 8; ++i)

1850 ShuffleMask.push_back(i + 16);

1851 for (int i = 8; i != 16; ++i)

1852 ShuffleMask.push_back(-1);

1853

1854 Value *SV = Builder.CreateShuffleVector(

1855 Builder.CreateBitCast(Op0, ShufTy),

1857 return Builder.CreateBitCast(SV, II.getType());

1858 }

1859

1860

1861

1862 if (CI0) {

1863 APInt Elt = CI0->getValue();

1866 return LowConstantHighUndef(Elt.getZExtValue());

1867 }

1868

1869

1870 if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {

1871 Value *Args[] = {Op0, CILength, CIIndex};

1872 return Builder.CreateIntrinsic(Intrinsic::x86_sse4a_extrqi, Args);

1873 }

1874 }

1875

1876

1877 if (CI0 && CI0->isZero())

1878 return LowConstantHighUndef(0);

1879

1880 return nullptr;

1881}

1882

1883

1884

1888

1889

1892

1893

1895

1896

1897

1899

1900

1901

1902 unsigned End = Index + Length;

1903

1904

1905

1906

1907

1908 if (End > 64)

1910

1911

1912

1913 if ((Length % 8) == 0 && (Index % 8) == 0) {

1914

1916 Index /= 8;

1917

1920

1922 for (int i = 0; i != (int)Index; ++i)

1924 for (int i = 0; i != (int)Length; ++i)

1925 ShuffleMask.push_back(i + 16);

1926 for (int i = Index + Length; i != 8; ++i)

1927 ShuffleMask.push_back(i);

1928 for (int i = 8; i != 16; ++i)

1929 ShuffleMask.push_back(-1);

1930

1931 Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),

1932 Builder.CreateBitCast(Op1, ShufTy),

1933 ShuffleMask);

1934 return Builder.CreateBitCast(SV, II.getType());

1935 }

1936

1937

1940 auto *CI00 =

1942 : nullptr;

1943 auto *CI10 =

1945 : nullptr;

1946

1947

1948 if (CI00 && CI10) {

1949 APInt V00 = CI00->getValue();

1950 APInt V10 = CI10->getValue();

1952 V00 = V00 & ~Mask;

1954 APInt Val = V00 | V10;

1959 }

1960

1961

1962

1963 if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {

1965 Constant *CILength = ConstantInt::get(IntTy8, Length, false);

1966 Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);

1967

1968 Value *Args[] = {Op0, Op1, CILength, CIIndex};

1969 return Builder.CreateIntrinsic(Intrinsic::x86_sse4a_insertqi, Args);

1970 }

1971

1972 return nullptr;

1973}

1974

1975

1979 if (!V)

1980 return nullptr;

1981

1983 unsigned NumElts = VecTy->getNumElements();

1984 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&

1985 "Unexpected number of elements in shuffle mask!");

1986

1987

1988 int Indexes[64];

1989

1990

1991

1992 for (unsigned I = 0; I < NumElts; ++I) {

1995 return nullptr;

1996

1998 Indexes[I] = -1;

1999 continue;

2000 }

2001

2002 int8_t Index = cast(COp)->getValue().getZExtValue();

2003

2004

2005

2006

2007

2008

2009

2010

2011 Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);

2012 Indexes[I] = Index;

2013 }

2014

2015 auto V1 = II.getArgOperand(0);

2017 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));

2018}

2019

2020

2024 if (!V)

2025 return nullptr;

2026

2028 unsigned NumElts = VecTy->getNumElements();

2029 bool IsPD = VecTy->getScalarType()->isDoubleTy();

2030 unsigned NumLaneElts = IsPD ? 2 : 4;

2031 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);

2032

2033

2034 int Indexes[16];

2035

2036

2037 for (unsigned I = 0; I < NumElts; ++I) {

2040 return nullptr;

2041

2043 Indexes[I] = -1;

2044 continue;

2045 }

2046

2049

2050

2051

2052 if (IsPD)

2054

2055

2056

2057

2058 Index += APInt(32, (I / NumLaneElts) * NumLaneElts);

2059

2060 Indexes[I] = Index.getZExtValue();

2061 }

2062

2063 auto V1 = II.getArgOperand(0);

2064 return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, NumElts));

2065}

2066

2067

2071 if (!V)

2072 return nullptr;

2073

2075 unsigned Size = VecTy->getNumElements();

2077 "Unexpected shuffle mask size");

2078

2079

2080 int Indexes[64];

2081

2082 for (unsigned I = 0; I < Size; ++I) {

2085 return nullptr;

2086

2088 Indexes[I] = -1;

2089 continue;

2090 }

2091

2093 Index &= Size - 1;

2094 Indexes[I] = Index;

2095 }

2096

2097 auto V1 = II.getArgOperand(0);

2098 return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, Size));

2099}

2100

2101

2105 if (!V)

2106 return nullptr;

2107

2109 unsigned Size = VecTy->getNumElements();

2111 Size == 64) &&

2112 "Unexpected shuffle mask size");

2113

2114

2115 int Indexes[64];

2116

2117 for (unsigned I = 0; I < Size; ++I) {

2120 return nullptr;

2121

2123 Indexes[I] = -1;

2124 continue;

2125 }

2126

2128 Index &= (2 * Size) - 1;

2129 Indexes[I] = Index;

2130 }

2131

2132 auto V1 = II.getArgOperand(0);

2133 auto V2 = II.getArgOperand(2);

2134 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, Size));

2135}

2136

2137

2141 unsigned EltSizeInBits = VecTy->getScalarSizeInBits();

2142 unsigned NumElts = VecTy->getNumElements();

2144 "Unexpected shuffle mask size");

2145

2146 unsigned IdxSizeInBits = Log2_32(IsBinary ? (2 * NumElts) : NumElts);

2148

2149 KnownBits KnownMask(EltSizeInBits);

2151}

2152

2153std::optional<Instruction *>

2155 auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,

2156 unsigned DemandedWidth) {

2157 APInt UndefElts(Width, 0);

2160 };

2161

2163 switch (IID) {

2164 case Intrinsic::x86_bmi_bextr_32:

2165 case Intrinsic::x86_bmi_bextr_64:

2166 case Intrinsic::x86_tbm_bextri_u32:

2167 case Intrinsic::x86_tbm_bextri_u64:

2168

2170 uint64_t Shift = C->getZExtValue();

2172 Shift &= 0xff;

2173 unsigned BitWidth = II.getType()->getIntegerBitWidth();

2174

2177 }

2178

2180 uint64_t Result = InC->getZExtValue() >> Shift;

2185 ConstantInt::get(II.getType(), Result));

2186 }

2187

2188

2189 }

2190 break;

2191

2192 case Intrinsic::x86_bmi_bzhi_32:

2193 case Intrinsic::x86_bmi_bzhi_64:

2194

2196 uint64_t Index = C->getZExtValue() & 0xff;

2197 unsigned BitWidth = II.getType()->getIntegerBitWidth();

2200 }

2201 if (Index == 0) {

2203 }

2204

2206 uint64_t Result = InC->getZExtValue();

2209 ConstantInt::get(II.getType(), Result));

2210 }

2211

2212 }

2213 break;

2214 case Intrinsic::x86_bmi_pext_32:

2215 case Intrinsic::x86_bmi_pext_64:

2217 if (MaskC->isNullValue()) {

2219 }

2220 if (MaskC->isAllOnesValue()) {

2222 }

2223

2224 unsigned MaskIdx, MaskLen;

2225 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {

2226

2227

2228

2231 Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);

2234 }

2235

2237 uint64_t Src = SrcC->getZExtValue();

2238 uint64_t Mask = MaskC->getZExtValue();

2241

2242 while (Mask) {

2243

2244 uint64_t BitToTest = Mask & -Mask;

2245 if (BitToTest & Src)

2246 Result |= BitToSet;

2247

2248 BitToSet <<= 1;

2249

2250 Mask &= Mask - 1;

2251 }

2252

2254 ConstantInt::get(II.getType(), Result));

2255 }

2256 }

2257 break;

2258 case Intrinsic::x86_bmi_pdep_32:

2259 case Intrinsic::x86_bmi_pdep_64:

2261 if (MaskC->isNullValue()) {

2263 }

2264 if (MaskC->isAllOnesValue()) {

2266 }

2267

2268 unsigned MaskIdx, MaskLen;

2269 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {

2270

2271

2272

2274 Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);

2278 }

2279

2281 uint64_t Src = SrcC->getZExtValue();

2282 uint64_t Mask = MaskC->getZExtValue();

2285

2286 while (Mask) {

2287

2288 uint64_t BitToSet = Mask & -Mask;

2289 if (BitToTest & Src)

2290 Result |= BitToSet;

2291

2292 BitToTest <<= 1;

2293

2294 Mask &= Mask - 1;

2295 }

2296

2298 ConstantInt::get(II.getType(), Result));

2299 }

2300 }

2301 break;

2302

2303 case Intrinsic::x86_sse_cvtss2si:

2304 case Intrinsic::x86_sse_cvtss2si64:

2305 case Intrinsic::x86_sse_cvttss2si:

2306 case Intrinsic::x86_sse_cvttss2si64:

2307 case Intrinsic::x86_sse2_cvtsd2si:

2308 case Intrinsic::x86_sse2_cvtsd2si64:

2309 case Intrinsic::x86_sse2_cvttsd2si:

2310 case Intrinsic::x86_sse2_cvttsd2si64:

2311 case Intrinsic::x86_avx512_vcvtss2si32:

2312 case Intrinsic::x86_avx512_vcvtss2si64:

2313 case Intrinsic::x86_avx512_vcvtss2usi32:

2314 case Intrinsic::x86_avx512_vcvtss2usi64:

2315 case Intrinsic::x86_avx512_vcvtsd2si32:

2316 case Intrinsic::x86_avx512_vcvtsd2si64:

2317 case Intrinsic::x86_avx512_vcvtsd2usi32:

2318 case Intrinsic::x86_avx512_vcvtsd2usi64:

2319 case Intrinsic::x86_avx512_cvttss2si:

2320 case Intrinsic::x86_avx512_cvttss2si64:

2321 case Intrinsic::x86_avx512_cvttss2usi:

2322 case Intrinsic::x86_avx512_cvttss2usi64:

2323 case Intrinsic::x86_avx512_cvttsd2si:

2324 case Intrinsic::x86_avx512_cvttsd2si64:

2325 case Intrinsic::x86_avx512_cvttsd2usi:

2326 case Intrinsic::x86_avx512_cvttsd2usi64: {

2327

2328

2329 Value *Arg = II.getArgOperand(0);

2331 if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {

2333 }

2334 break;

2335 }

2336

2337 case Intrinsic::x86_mmx_pmovmskb:

2338 case Intrinsic::x86_sse_movmsk_ps:

2339 case Intrinsic::x86_sse2_movmsk_pd:

2340 case Intrinsic::x86_sse2_pmovmskb_128:

2341 case Intrinsic::x86_avx_movmsk_pd_256:

2342 case Intrinsic::x86_avx_movmsk_ps_256:

2343 case Intrinsic::x86_avx2_pmovmskb:

2346 }

2347 break;

2348

2349 case Intrinsic::x86_sse_comieq_ss:

2350 case Intrinsic::x86_sse_comige_ss:

2351 case Intrinsic::x86_sse_comigt_ss:

2352 case Intrinsic::x86_sse_comile_ss:

2353 case Intrinsic::x86_sse_comilt_ss:

2354 case Intrinsic::x86_sse_comineq_ss:

2355 case Intrinsic::x86_sse_ucomieq_ss:

2356 case Intrinsic::x86_sse_ucomige_ss:

2357 case Intrinsic::x86_sse_ucomigt_ss:

2358 case Intrinsic::x86_sse_ucomile_ss:

2359 case Intrinsic::x86_sse_ucomilt_ss:

2360 case Intrinsic::x86_sse_ucomineq_ss:

2361 case Intrinsic::x86_sse2_comieq_sd:

2362 case Intrinsic::x86_sse2_comige_sd:

2363 case Intrinsic::x86_sse2_comigt_sd:

2364 case Intrinsic::x86_sse2_comile_sd:

2365 case Intrinsic::x86_sse2_comilt_sd:

2366 case Intrinsic::x86_sse2_comineq_sd:

2367 case Intrinsic::x86_sse2_ucomieq_sd:

2368 case Intrinsic::x86_sse2_ucomige_sd:

2369 case Intrinsic::x86_sse2_ucomigt_sd:

2370 case Intrinsic::x86_sse2_ucomile_sd:

2371 case Intrinsic::x86_sse2_ucomilt_sd:

2372 case Intrinsic::x86_sse2_ucomineq_sd:

2373 case Intrinsic::x86_avx512_vcomi_ss:

2374 case Intrinsic::x86_avx512_vcomi_sd:

2375 case Intrinsic::x86_avx512_mask_cmp_ss:

2376 case Intrinsic::x86_avx512_mask_cmp_sd: {

2377

2378

2379 bool MadeChange = false;

2380 Value *Arg0 = II.getArgOperand(0);

2381 Value *Arg1 = II.getArgOperand(1);

2383 if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {

2385 MadeChange = true;

2386 }

2387 if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {

2389 MadeChange = true;

2390 }

2391 if (MadeChange) {

2392 return &II;

2393 }

2394 break;

2395 }

2396

2397 case Intrinsic::x86_avx512_add_ps_512:

2398 case Intrinsic::x86_avx512_div_ps_512:

2399 case Intrinsic::x86_avx512_mul_ps_512:

2400 case Intrinsic::x86_avx512_sub_ps_512:

2401 case Intrinsic::x86_avx512_add_pd_512:

2402 case Intrinsic::x86_avx512_div_pd_512:

2403 case Intrinsic::x86_avx512_mul_pd_512:

2404 case Intrinsic::x86_avx512_sub_pd_512:

2405

2406

2408 if (R->getValue() == 4) {

2409 Value *Arg0 = II.getArgOperand(0);

2410 Value *Arg1 = II.getArgOperand(1);

2411

2413 switch (IID) {

2414 default:

2416 case Intrinsic::x86_avx512_add_ps_512:

2417 case Intrinsic::x86_avx512_add_pd_512:

2419 break;

2420 case Intrinsic::x86_avx512_sub_ps_512:

2421 case Intrinsic::x86_avx512_sub_pd_512:

2423 break;

2424 case Intrinsic::x86_avx512_mul_ps_512:

2425 case Intrinsic::x86_avx512_mul_pd_512:

2427 break;

2428 case Intrinsic::x86_avx512_div_ps_512:

2429 case Intrinsic::x86_avx512_div_pd_512:

2431 break;

2432 }

2433

2435 }

2436 }

2437 break;

2438

2439 case Intrinsic::x86_avx512_mask_add_ss_round:

2440 case Intrinsic::x86_avx512_mask_div_ss_round:

2441 case Intrinsic::x86_avx512_mask_mul_ss_round:

2442 case Intrinsic::x86_avx512_mask_sub_ss_round:

2443 case Intrinsic::x86_avx512_mask_add_sd_round:

2444 case Intrinsic::x86_avx512_mask_div_sd_round:

2445 case Intrinsic::x86_avx512_mask_mul_sd_round:

2446 case Intrinsic::x86_avx512_mask_sub_sd_round:

2447

2448

2450 if (R->getValue() == 4) {

2451

2452 Value *Arg0 = II.getArgOperand(0);

2453 Value *Arg1 = II.getArgOperand(1);

2456

2458 switch (IID) {

2459 default:

2461 case Intrinsic::x86_avx512_mask_add_ss_round:

2462 case Intrinsic::x86_avx512_mask_add_sd_round:

2464 break;

2465 case Intrinsic::x86_avx512_mask_sub_ss_round:

2466 case Intrinsic::x86_avx512_mask_sub_sd_round:

2468 break;

2469 case Intrinsic::x86_avx512_mask_mul_ss_round:

2470 case Intrinsic::x86_avx512_mask_mul_sd_round:

2472 break;

2473 case Intrinsic::x86_avx512_mask_div_ss_round:

2474 case Intrinsic::x86_avx512_mask_div_sd_round:

2476 break;

2477 }

2478

2479

2480 Value *Mask = II.getArgOperand(3);

2482

2483 if (C || C->getValue()[0]) {

2484

2490

2491 Value *Passthru =

2494 }

2495

2496

2498

2500 }

2501 }

2502 break;

2503

2504

2505

2506

2507 case Intrinsic::x86_sse2_psrai_d:

2508 case Intrinsic::x86_sse2_psrai_w:

2509 case Intrinsic::x86_avx2_psrai_d:

2510 case Intrinsic::x86_avx2_psrai_w:

2511 case Intrinsic::x86_avx512_psrai_q_128:

2512 case Intrinsic::x86_avx512_psrai_q_256:

2513 case Intrinsic::x86_avx512_psrai_d_512:

2514 case Intrinsic::x86_avx512_psrai_q_512:

2515 case Intrinsic::x86_avx512_psrai_w_512:

2516 case Intrinsic::x86_sse2_psrli_d:

2517 case Intrinsic::x86_sse2_psrli_q:

2518 case Intrinsic::x86_sse2_psrli_w:

2519 case Intrinsic::x86_avx2_psrli_d:

2520 case Intrinsic::x86_avx2_psrli_q:

2521 case Intrinsic::x86_avx2_psrli_w:

2522 case Intrinsic::x86_avx512_psrli_d_512:

2523 case Intrinsic::x86_avx512_psrli_q_512:

2524 case Intrinsic::x86_avx512_psrli_w_512:

2525 case Intrinsic::x86_sse2_pslli_d:

2526 case Intrinsic::x86_sse2_pslli_q:

2527 case Intrinsic::x86_sse2_pslli_w:

2528 case Intrinsic::x86_avx2_pslli_d:

2529 case Intrinsic::x86_avx2_pslli_q:

2530 case Intrinsic::x86_avx2_pslli_w:

2531 case Intrinsic::x86_avx512_pslli_d_512:

2532 case Intrinsic::x86_avx512_pslli_q_512:

2533 case Intrinsic::x86_avx512_pslli_w_512:

2536 }

2537 break;

2538

2539 case Intrinsic::x86_sse2_psra_d:

2540 case Intrinsic::x86_sse2_psra_w:

2541 case Intrinsic::x86_avx2_psra_d:

2542 case Intrinsic::x86_avx2_psra_w:

2543 case Intrinsic::x86_avx512_psra_q_128:

2544 case Intrinsic::x86_avx512_psra_q_256:

2545 case Intrinsic::x86_avx512_psra_d_512:

2546 case Intrinsic::x86_avx512_psra_q_512:

2547 case Intrinsic::x86_avx512_psra_w_512:

2548 case Intrinsic::x86_sse2_psrl_d:

2549 case Intrinsic::x86_sse2_psrl_q:

2550 case Intrinsic::x86_sse2_psrl_w:

2551 case Intrinsic::x86_avx2_psrl_d:

2552 case Intrinsic::x86_avx2_psrl_q:

2553 case Intrinsic::x86_avx2_psrl_w:

2554 case Intrinsic::x86_avx512_psrl_d_512:

2555 case Intrinsic::x86_avx512_psrl_q_512:

2556 case Intrinsic::x86_avx512_psrl_w_512:

2557 case Intrinsic::x86_sse2_psll_d:

2558 case Intrinsic::x86_sse2_psll_q:

2559 case Intrinsic::x86_sse2_psll_w:

2560 case Intrinsic::x86_avx2_psll_d:

2561 case Intrinsic::x86_avx2_psll_q:

2562 case Intrinsic::x86_avx2_psll_w:

2563 case Intrinsic::x86_avx512_psll_d_512:

2564 case Intrinsic::x86_avx512_psll_q_512:

2565 case Intrinsic::x86_avx512_psll_w_512: {

2568 }

2569

2570

2571

2572 Value *Arg1 = II.getArgOperand(1);

2574 "Unexpected packed shift size");

2576

2577 if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {

2579 }

2580 break;

2581 }

2582

2583 case Intrinsic::x86_avx2_psllv_d:

2584 case Intrinsic::x86_avx2_psllv_d_256:

2585 case Intrinsic::x86_avx2_psllv_q:

2586 case Intrinsic::x86_avx2_psllv_q_256:

2587 case Intrinsic::x86_avx512_psllv_d_512:

2588 case Intrinsic::x86_avx512_psllv_q_512:

2589 case Intrinsic::x86_avx512_psllv_w_128:

2590 case Intrinsic::x86_avx512_psllv_w_256:

2591 case Intrinsic::x86_avx512_psllv_w_512:

2592 case Intrinsic::x86_avx2_psrav_d:

2593 case Intrinsic::x86_avx2_psrav_d_256:

2594 case Intrinsic::x86_avx512_psrav_q_128:

2595 case Intrinsic::x86_avx512_psrav_q_256:

2596 case Intrinsic::x86_avx512_psrav_d_512:

2597 case Intrinsic::x86_avx512_psrav_q_512:

2598 case Intrinsic::x86_avx512_psrav_w_128:

2599 case Intrinsic::x86_avx512_psrav_w_256:

2600 case Intrinsic::x86_avx512_psrav_w_512:

2601 case Intrinsic::x86_avx2_psrlv_d:

2602 case Intrinsic::x86_avx2_psrlv_d_256:

2603 case Intrinsic::x86_avx2_psrlv_q:

2604 case Intrinsic::x86_avx2_psrlv_q_256:

2605 case Intrinsic::x86_avx512_psrlv_d_512:

2606 case Intrinsic::x86_avx512_psrlv_q_512:

2607 case Intrinsic::x86_avx512_psrlv_w_128:

2608 case Intrinsic::x86_avx512_psrlv_w_256:

2609 case Intrinsic::x86_avx512_psrlv_w_512:

2612 }

2613 break;

2614

2615 case Intrinsic::x86_sse2_packssdw_128:

2616 case Intrinsic::x86_sse2_packsswb_128:

2617 case Intrinsic::x86_avx2_packssdw:

2618 case Intrinsic::x86_avx2_packsswb:

2619 case Intrinsic::x86_avx512_packssdw_512:

2620 case Intrinsic::x86_avx512_packsswb_512:

2623 }

2624 break;

2625

2626 case Intrinsic::x86_sse2_packuswb_128:

2627 case Intrinsic::x86_sse41_packusdw:

2628 case Intrinsic::x86_avx2_packusdw:

2629 case Intrinsic::x86_avx2_packuswb:

2630 case Intrinsic::x86_avx512_packusdw_512:

2631 case Intrinsic::x86_avx512_packuswb_512:

2634 }

2635 break;

2636

2637 case Intrinsic::x86_sse2_pmulh_w:

2638 case Intrinsic::x86_avx2_pmulh_w:

2639 case Intrinsic::x86_avx512_pmulh_w_512:

2642 }

2643 break;

2644

2645 case Intrinsic::x86_sse2_pmulhu_w:

2646 case Intrinsic::x86_avx2_pmulhu_w:

2647 case Intrinsic::x86_avx512_pmulhu_w_512:

2650 }

2651 break;

2652

2653 case Intrinsic::x86_ssse3_pmul_hr_sw_128:

2654 case Intrinsic::x86_avx2_pmul_hr_sw:

2655 case Intrinsic::x86_avx512_pmul_hr_sw_512:

2658 }

2659 break;

2660

2661 case Intrinsic::x86_sse2_pmadd_wd:

2662 case Intrinsic::x86_avx2_pmadd_wd:

2663 case Intrinsic::x86_avx512_pmaddw_d_512:

2666 }

2667 break;

2668

2669 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:

2670 case Intrinsic::x86_avx2_pmadd_ub_sw:

2671 case Intrinsic::x86_avx512_pmaddubs_w_512:

2674 }

2675 break;

2676

2677 case Intrinsic::x86_pclmulqdq:

2678 case Intrinsic::x86_pclmulqdq_256:

2679 case Intrinsic::x86_pclmulqdq_512: {

2681 unsigned Imm = C->getZExtValue();

2682

2683 bool MadeChange = false;

2684 Value *Arg0 = II.getArgOperand(0);

2685 Value *Arg1 = II.getArgOperand(1);

2686 unsigned VWidth =

2688

2689 APInt UndefElts1(VWidth, 0);

2690 APInt DemandedElts1 =

2695 MadeChange = true;

2696 }

2697

2698 APInt UndefElts2(VWidth, 0);

2699 APInt DemandedElts2 =

2704 MadeChange = true;

2705 }

2706

2707

2708 if (DemandedElts1.isSubsetOf(UndefElts1) ||

2709 DemandedElts2.isSubsetOf(UndefElts2)) {

2712 }

2713

2714 if (MadeChange) {

2715 return &II;

2716 }

2717 }

2718 break;

2719 }

2720

2721 case Intrinsic::x86_sse41_insertps:

2724 }

2725 break;

2726

2727 case Intrinsic::x86_sse4a_extrq: {

2728 Value *Op0 = II.getArgOperand(0);

2729 Value *Op1 = II.getArgOperand(1);

2734 VWidth1 == 16 && "Unexpected operand sizes");

2735

2736

2738 auto *CILength =

2740 : nullptr;

2741 auto *CIIndex =

2743 : nullptr;

2744

2745

2748 }

2749

2750

2751

2752 bool MadeChange = false;

2753 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {

2755 MadeChange = true;

2756 }

2757 if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {

2759 MadeChange = true;

2760 }

2761 if (MadeChange) {

2762 return &II;

2763 }

2764 break;

2765 }

2766

2767 case Intrinsic::x86_sse4a_extrqi: {

2768

2769

2770 Value *Op0 = II.getArgOperand(0);

2773 "Unexpected operand size");

2774

2775

2778

2779

2782 }

2783

2784

2785

2786 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {

2788 }

2789 break;

2790 }

2791

2792 case Intrinsic::x86_sse4a_insertq: {

2793 Value *Op0 = II.getArgOperand(0);

2794 Value *Op1 = II.getArgOperand(1);

2799 "Unexpected operand size");

2800

2801

2803 auto *CI11 =

2805 : nullptr;

2806

2807

2808 if (CI11) {

2809 const APInt &V11 = CI11->getValue();

2814 }

2815 }

2816

2817

2818

2819 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {

2821 }

2822 break;

2823 }

2824

2825 case Intrinsic::x86_sse4a_insertqi: {

2826

2827

2828

2829 Value *Op0 = II.getArgOperand(0);

2830 Value *Op1 = II.getArgOperand(1);

2835 VWidth1 == 2 && "Unexpected operand sizes");

2836

2837

2840

2841

2842 if (CILength && CIIndex) {

2843 APInt Len = CILength->getValue().zextOrTrunc(6);

2847 }

2848 }

2849

2850

2851

2852 bool MadeChange = false;

2853 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {

2855 MadeChange = true;

2856 }

2857 if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {

2859 MadeChange = true;

2860 }

2861 if (MadeChange) {

2862 return &II;

2863 }

2864 break;

2865 }

2866

2867 case Intrinsic::x86_sse41_pblendvb:

2868 case Intrinsic::x86_sse41_blendvps:

2869 case Intrinsic::x86_sse41_blendvpd:

2870 case Intrinsic::x86_avx_blendv_ps_256:

2871 case Intrinsic::x86_avx_blendv_pd_256:

2872 case Intrinsic::x86_avx2_pblendvb: {

2873

2874 Value *Op0 = II.getArgOperand(0);

2875 Value *Op1 = II.getArgOperand(1);

2876 Value *Mask = II.getArgOperand(2);

2877 if (Op0 == Op1) {

2879 }

2880

2881

2884 }

2885

2886

2891 }

2892

2894

2895

2896

2897

2898 Value *MaskSrc = nullptr;

2901 m_Mask(ShuffleMask))))) {

2902

2904 if (NumElts < (int)ShuffleMask.size() || isPowerOf2\_32(NumElts) ||

2906 [NumElts](int M) { return M < 0 || M >= NumElts; }))

2907 break;

2909 }

2910

2911

2912

2919 unsigned NumMaskElts = MaskTy->getNumElements();

2920 unsigned NumOperandElts = OpTy->getNumElements();

2921

2922

2923 if (MaskSrc) {

2924 unsigned NumMaskSrcElts =

2926 NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;

2927

2928 if (NumMaskElts > NumOperandElts)

2929 break;

2932 break;

2935 }

2936 assert(MaskTy->getPrimitiveSizeInBits() ==

2937 OpTy->getPrimitiveSizeInBits() &&

2938 "Not expecting mask and operands with different sizes");

2939

2940 if (NumMaskElts == NumOperandElts) {

2942 }

2943

2944

2945

2946 if (NumMaskElts < NumOperandElts) {

2951 }

2952 }

2953

2954 break;

2955 }

2956

2957 case Intrinsic::x86_ssse3_pshuf_b_128:

2958 case Intrinsic::x86_avx2_pshuf_b:

2959 case Intrinsic::x86_avx512_pshuf_b_512: {

2962 }

2963

2966 return &II;

2967 break;

2968 }

2969

2970 case Intrinsic::x86_avx_vpermilvar_ps:

2971 case Intrinsic::x86_avx_vpermilvar_ps_256:

2972 case Intrinsic::x86_avx512_vpermilvar_ps_512: {

2975 }

2976

2979 return &II;

2980 break;

2981 }

2982

2983 case Intrinsic::x86_avx_vpermilvar_pd:

2984 case Intrinsic::x86_avx_vpermilvar_pd_256:

2985 case Intrinsic::x86_avx512_vpermilvar_pd_512: {

2988 }

2989

2992 return &II;

2993 break;

2994 }

2995

2996 case Intrinsic::x86_avx2_permd:

2997 case Intrinsic::x86_avx2_permps:

2998 case Intrinsic::x86_avx512_permvar_df_256:

2999 case Intrinsic::x86_avx512_permvar_df_512:

3000 case Intrinsic::x86_avx512_permvar_di_256:

3001 case Intrinsic::x86_avx512_permvar_di_512:

3002 case Intrinsic::x86_avx512_permvar_hi_128:

3003 case Intrinsic::x86_avx512_permvar_hi_256:

3004 case Intrinsic::x86_avx512_permvar_hi_512:

3005 case Intrinsic::x86_avx512_permvar_qi_128:

3006 case Intrinsic::x86_avx512_permvar_qi_256:

3007 case Intrinsic::x86_avx512_permvar_qi_512:

3008 case Intrinsic::x86_avx512_permvar_sf_512:

3009 case Intrinsic::x86_avx512_permvar_si_512:

3012 }

3014 return &II;

3015 break;

3016

3017 case Intrinsic::x86_avx512_vpermi2var_d_128:

3018 case Intrinsic::x86_avx512_vpermi2var_d_256:

3019 case Intrinsic::x86_avx512_vpermi2var_d_512:

3020 case Intrinsic::x86_avx512_vpermi2var_hi_128:

3021 case Intrinsic::x86_avx512_vpermi2var_hi_256:

3022 case Intrinsic::x86_avx512_vpermi2var_hi_512:

3023 case Intrinsic::x86_avx512_vpermi2var_pd_128:

3024 case Intrinsic::x86_avx512_vpermi2var_pd_256:

3025 case Intrinsic::x86_avx512_vpermi2var_pd_512:

3026 case Intrinsic::x86_avx512_vpermi2var_ps_128:

3027 case Intrinsic::x86_avx512_vpermi2var_ps_256:

3028 case Intrinsic::x86_avx512_vpermi2var_ps_512:

3029 case Intrinsic::x86_avx512_vpermi2var_q_128:

3030 case Intrinsic::x86_avx512_vpermi2var_q_256:

3031 case Intrinsic::x86_avx512_vpermi2var_q_512:

3032 case Intrinsic::x86_avx512_vpermi2var_qi_128:

3033 case Intrinsic::x86_avx512_vpermi2var_qi_256:

3034 case Intrinsic::x86_avx512_vpermi2var_qi_512:

3037 }

3039 return &II;

3040 break;

3041

3042 case Intrinsic::x86_avx_maskload_ps:

3043 case Intrinsic::x86_avx_maskload_pd:

3044 case Intrinsic::x86_avx_maskload_ps_256:

3045 case Intrinsic::x86_avx_maskload_pd_256:

3046 case Intrinsic::x86_avx2_maskload_d:

3047 case Intrinsic::x86_avx2_maskload_q:

3048 case Intrinsic::x86_avx2_maskload_d_256:

3049 case Intrinsic::x86_avx2_maskload_q_256:

3051 return I;

3052 }

3053 break;

3054

3055 case Intrinsic::x86_sse2_maskmov_dqu:

3056 case Intrinsic::x86_avx_maskstore_ps:

3057 case Intrinsic::x86_avx_maskstore_pd:

3058 case Intrinsic::x86_avx_maskstore_ps_256:

3059 case Intrinsic::x86_avx_maskstore_pd_256:

3060 case Intrinsic::x86_avx2_maskstore_d:

3061 case Intrinsic::x86_avx2_maskstore_q:

3062 case Intrinsic::x86_avx2_maskstore_d_256:

3063 case Intrinsic::x86_avx2_maskstore_q_256:

3065 return nullptr;

3066 }

3067 break;

3068

3069 case Intrinsic::x86_addcarry_32:

3070 case Intrinsic::x86_addcarry_64:

3073 }

3074 break;

3075

3076 case Intrinsic::x86_avx512_pternlog_d_128:

3077 case Intrinsic::x86_avx512_pternlog_d_256:

3078 case Intrinsic::x86_avx512_pternlog_d_512:

3079 case Intrinsic::x86_avx512_pternlog_q_128:

3080 case Intrinsic::x86_avx512_pternlog_q_256:

3081 case Intrinsic::x86_avx512_pternlog_q_512:

3084 }

3085 break;

3086 default:

3087 break;

3088 }

3089 return std::nullopt;

3090}

3091

3094 bool &KnownBitsComputed) const {

3095 switch (II.getIntrinsicID()) {

3096 default:

3097 break;

3098 case Intrinsic::x86_mmx_pmovmskb:

3099 case Intrinsic::x86_sse_movmsk_ps:

3100 case Intrinsic::x86_sse2_movmsk_pd:

3101 case Intrinsic::x86_sse2_pmovmskb_128:

3102 case Intrinsic::x86_avx_movmsk_ps_256:

3103 case Intrinsic::x86_avx_movmsk_pd_256:

3104 case Intrinsic::x86_avx2_pmovmskb: {

3105

3106

3107 unsigned ArgWidth;

3108 if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {

3109 ArgWidth = 8;

3110 } else {

3112 ArgWidth = ArgType->getNumElements();

3113 }

3114

3115

3116

3118 Type *VTy = II.getType();

3119 if (DemandedElts.isZero()) {

3121 }

3122

3123

3125 KnownBitsComputed = true;

3126 break;

3127 }

3128 }

3129 return std::nullopt;

3130}

3131

3134 APInt &UndefElts2, APInt &UndefElts3,

3136 simplifyAndSetOp) const {

3138 switch (II.getIntrinsicID()) {

3139 default:

3140 break;

3141 case Intrinsic::x86_xop_vfrcz_ss:

3142 case Intrinsic::x86_xop_vfrcz_sd:

3143

3144

3145

3146

3147 if (!DemandedElts[0]) {

3150 }

3151

3152

3153 DemandedElts = 1;

3154 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

3155

3156

3157 UndefElts = UndefElts[0];

3158 break;

3159

3160

3161 case Intrinsic::x86_sse_rcp_ss:

3162 case Intrinsic::x86_sse_rsqrt_ss:

3163 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

3164

3165

3166 if (!DemandedElts[0]) {

3168 return II.getArgOperand(0);

3169 }

3170

3171

3172 break;

3173

3174

3175

3176

3177 case Intrinsic::x86_sse_min_ss:

3178 case Intrinsic::x86_sse_max_ss:

3179 case Intrinsic::x86_sse_cmp_ss:

3180 case Intrinsic::x86_sse2_min_sd:

3181 case Intrinsic::x86_sse2_max_sd:

3182 case Intrinsic::x86_sse2_cmp_sd: {

3183 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

3184

3185

3186 if (!DemandedElts[0]) {

3188 return II.getArgOperand(0);

3189 }

3190

3191

3192 DemandedElts = 1;

3193 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);

3194

3195

3196

3197 if (!UndefElts2[0])

3199

3200 break;

3201 }

3202

3203

3204

3205 case Intrinsic::x86_sse41_round_ss:

3206 case Intrinsic::x86_sse41_round_sd: {

3207

3208 APInt DemandedElts2 = DemandedElts;

3210 simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);

3211

3212

3213 if (!DemandedElts[0]) {

3215 return II.getArgOperand(0);

3216 }

3217

3218

3219 DemandedElts = 1;

3220 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);

3221

3222

3223

3225 UndefElts |= UndefElts2[0];

3226 break;

3227 }

3228

3229

3230

3231

3232 case Intrinsic::x86_avx512_mask_add_ss_round:

3233 case Intrinsic::x86_avx512_mask_div_ss_round:

3234 case Intrinsic::x86_avx512_mask_mul_ss_round:

3235 case Intrinsic::x86_avx512_mask_sub_ss_round:

3236 case Intrinsic::x86_avx512_mask_max_ss_round:

3237 case Intrinsic::x86_avx512_mask_min_ss_round:

3238 case Intrinsic::x86_avx512_mask_add_sd_round:

3239 case Intrinsic::x86_avx512_mask_div_sd_round:

3240 case Intrinsic::x86_avx512_mask_mul_sd_round:

3241 case Intrinsic::x86_avx512_mask_sub_sd_round:

3242 case Intrinsic::x86_avx512_mask_max_sd_round:

3243 case Intrinsic::x86_avx512_mask_min_sd_round:

3244 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

3245

3246

3247 if (!DemandedElts[0]) {

3249 return II.getArgOperand(0);

3250 }

3251

3252

3253 DemandedElts = 1;

3254 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);

3255 simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);

3256

3257

3258

3259 if (!UndefElts2[0] || !UndefElts3[0])

3261 break;

3262

3263

3264 case Intrinsic::x86_sse3_addsub_pd:

3265 case Intrinsic::x86_sse3_addsub_ps:

3266 case Intrinsic::x86_avx_addsub_pd_256:

3267 case Intrinsic::x86_avx_addsub_ps_256: {

3268

3269

3272 bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);

3273 bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);

3274 if (IsSubOnly || IsAddOnly) {

3275 assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");

3278 Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);

3280 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);

3281 }

3282

3283 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

3284 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);

3285 UndefElts &= UndefElts2;

3286 break;

3287 }

3288

3289

3290 case Intrinsic::x86_avx2_psllv_d:

3291 case Intrinsic::x86_avx2_psllv_d_256:

3292 case Intrinsic::x86_avx2_psllv_q:

3293 case Intrinsic::x86_avx2_psllv_q_256:

3294 case Intrinsic::x86_avx2_psrlv_d:

3295 case Intrinsic::x86_avx2_psrlv_d_256:

3296 case Intrinsic::x86_avx2_psrlv_q:

3297 case Intrinsic::x86_avx2_psrlv_q_256:

3298 case Intrinsic::x86_avx2_psrav_d:

3299 case Intrinsic::x86_avx2_psrav_d_256: {

3300 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

3301 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);

3302 UndefElts &= UndefElts2;

3303 break;

3304 }

3305

3306 case Intrinsic::x86_sse2_pmulh_w:

3307 case Intrinsic::x86_avx2_pmulh_w:

3308 case Intrinsic::x86_avx512_pmulh_w_512:

3309 case Intrinsic::x86_sse2_pmulhu_w:

3310 case Intrinsic::x86_avx2_pmulhu_w:

3311 case Intrinsic::x86_avx512_pmulhu_w_512:

3312 case Intrinsic::x86_ssse3_pmul_hr_sw_128:

3313 case Intrinsic::x86_avx2_pmul_hr_sw:

3314 case Intrinsic::x86_avx512_pmul_hr_sw_512: {

3315 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

3316 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);

3317

3318 break;

3319 }

3320

3321 case Intrinsic::x86_sse2_packssdw_128:

3322 case Intrinsic::x86_sse2_packsswb_128:

3323 case Intrinsic::x86_sse2_packuswb_128:

3324 case Intrinsic::x86_sse41_packusdw:

3325 case Intrinsic::x86_avx2_packssdw:

3326 case Intrinsic::x86_avx2_packsswb:

3327 case Intrinsic::x86_avx2_packusdw:

3328 case Intrinsic::x86_avx2_packuswb:

3329 case Intrinsic::x86_avx512_packssdw_512:

3330 case Intrinsic::x86_avx512_packsswb_512:

3331 case Intrinsic::x86_avx512_packusdw_512:

3332 case Intrinsic::x86_avx512_packuswb_512: {

3333 auto *Ty0 = II.getArgOperand(0)->getType();

3335 assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");

3336

3337 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;

3338 unsigned VWidthPerLane = VWidth / NumLanes;

3339 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;

3340

3341

3342

3343

3344

3345 for (int OpNum = 0; OpNum != 2; ++OpNum) {

3346 APInt OpDemandedElts(InnerVWidth, 0);

3347 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {

3348 unsigned LaneIdx = Lane * VWidthPerLane;

3349 for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {

3350 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;

3351 if (DemandedElts[Idx])

3352 OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);

3353 }

3354 }

3355

3356

3357 APInt OpUndefElts(InnerVWidth, 0);

3358 simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);

3359

3360

3361 OpUndefElts = OpUndefElts.zext(VWidth);

3362 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {

3363 APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);

3364 LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);

3365 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);

3366 UndefElts |= LaneElts;

3367 }

3368 }

3369 break;

3370 }

3371

3372 case Intrinsic::x86_sse2_pmadd_wd:

3373 case Intrinsic::x86_avx2_pmadd_wd:

3374 case Intrinsic::x86_avx512_pmaddw_d_512:

3375 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:

3376 case Intrinsic::x86_avx2_pmadd_ub_sw:

3377 case Intrinsic::x86_avx512_pmaddubs_w_512: {

3378

3379 auto *ArgTy = II.getArgOperand(0)->getType();

3381 assert((VWidth * 2) == InnerVWidth && "Unexpected input size");

3383 APInt Op0UndefElts(InnerVWidth, 0);

3384 APInt Op1UndefElts(InnerVWidth, 0);

3385 simplifyAndSetOp(&II, 0, OpDemandedElts, Op0UndefElts);

3386 simplifyAndSetOp(&II, 1, OpDemandedElts, Op1UndefElts);

3387

3388 break;

3389 }

3390

3391

3392 case Intrinsic::x86_ssse3_pshuf_b_128:

3393 case Intrinsic::x86_avx2_pshuf_b:

3394 case Intrinsic::x86_avx512_pshuf_b_512:

3395

3396 case Intrinsic::x86_avx_vpermilvar_ps:

3397 case Intrinsic::x86_avx_vpermilvar_ps_256:

3398 case Intrinsic::x86_avx512_vpermilvar_ps_512:

3399 case Intrinsic::x86_avx_vpermilvar_pd:

3400 case Intrinsic::x86_avx_vpermilvar_pd_256:

3401 case Intrinsic::x86_avx512_vpermilvar_pd_512:

3402

3403 case Intrinsic::x86_avx2_permd:

3404 case Intrinsic::x86_avx2_permps: {

3405 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);

3406 break;

3407 }

3408

3409

3410

3411 case Intrinsic::x86_sse4a_extrq:

3412 case Intrinsic::x86_sse4a_extrqi:

3413 case Intrinsic::x86_sse4a_insertq:

3414 case Intrinsic::x86_sse4a_insertqi:

3416 break;

3417 }

3418 return std::nullopt;

3419}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file provides the interface for the instcombine pass implementation.

uint64_t IntrinsicInst * II

static unsigned getNumElements(Type *Ty)

static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Definition X86InstCombineIntrinsic.cpp:669

static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)

Definition X86InstCombineIntrinsic.cpp:58

static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Definition X86InstCombineIntrinsic.cpp:115

static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)

Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...

Definition X86InstCombineIntrinsic.cpp:1885

static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Definition X86InstCombineIntrinsic.cpp:642

static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)

Definition X86InstCombineIntrinsic.cpp:433

static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)

Return a constant boolean vector that has true elements in all positions where the input constant dat...

Definition X86InstCombineIntrinsic.cpp:30

static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Attempt to convert pshufb* to shufflevector if the mask is constant.

Definition X86InstCombineIntrinsic.cpp:1976

static Value * simplifyX86vpermv3(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant.

Definition X86InstCombineIntrinsic.cpp:2102

static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)

Definition X86InstCombineIntrinsic.cpp:82

static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Attempt to convert vpermilvar* to shufflevector if the mask is constant.

Definition X86InstCombineIntrinsic.cpp:2021

static Value * simplifyX86pmulh(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned, bool IsRounding)

Definition X86InstCombineIntrinsic.cpp:499

static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Definition X86InstCombineIntrinsic.cpp:611

static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.

Definition X86InstCombineIntrinsic.cpp:2068

static Value * simplifyX86pmadd(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsPMADDWD)

Definition X86InstCombineIntrinsic.cpp:557

static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Definition X86InstCombineIntrinsic.cpp:1737

static bool simplifyX86VPERMMask(Instruction *II, bool IsBinary, InstCombiner &IC)

Definition X86InstCombineIntrinsic.cpp:2138

static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)

Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...

Definition X86InstCombineIntrinsic.cpp:1796

static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)

Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...

Definition X86InstCombineIntrinsic.cpp:41

static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)

Definition X86InstCombineIntrinsic.cpp:297

This file a TargetTransformInfoImplBase conforming object specific to the X86 target machine.

The Input class is used to parse a yaml document into in-memory structs and vectors.

Class for arbitrary precision integers.

LLVM_ABI APInt getLoBits(unsigned numBits) const

Compute an APInt containing numBits lowbits from this APInt.

void clearBit(unsigned BitPosition)

Set a given bit to 0.

LLVM_ABI APInt zext(unsigned width) const

Zero extend to a new width.

uint64_t getZExtValue() const

Get zero extended value.

void setHighBits(unsigned hiBits)

Set the top hiBits bits.

void setBitsFrom(unsigned loBit)

Set the top bits starting from loBit.

LLVM_ABI APInt zextOrTrunc(unsigned width) const

Zero extend or truncate to width.

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)

Get a value with a block of bits set.

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

bool ult(const APInt &RHS) const

Unsigned less than comparison.

static APInt getSignedMaxValue(unsigned numBits)

Gets maximum signed value of APInt for a specific bit width.

static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)

Return a value containing V broadcasted over NewLen bits.

static APInt getSignedMinValue(unsigned numBits)

Gets minimum signed value of APInt for a specific bit width.

LLVM_ABI APInt sext(unsigned width) const

Sign extend to a new width.

APInt shl(unsigned shiftAmt) const

Left-shift function.

bool isSubsetOf(const APInt &RHS) const

This operation checks that all bits set in this APInt are also set in RHS.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getZero(unsigned numBits)

Get the '0' value for the specified bit-width.

static APInt getOneBitSet(unsigned numBits, unsigned BitNo)

Return an APInt with exactly one bit set in the result.

void lshrInPlace(unsigned ShiftAmt)

Logical right-shift this APInt by ShiftAmt in place.

APInt lshr(unsigned shiftAmt) const

Logical right-shift function.

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

This class represents a no-op cast from one type to another.

This class represents a function call, abstracting a target machine's calling convention.

@ ICMP_SGT

signed greater than

All zero aggregate value.

static LLVM_ABI ConstantAggregateZero * get(Type *Ty)

static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)

This is the shared class of boolean and integer constants.

const APInt & getValue() const

Return the constant as an APInt value reference.

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

This is an important base class in LLVM.

static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)

Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

LLVM_ABI Constant * getAggregateElement(unsigned Elt) const

For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...

A parsed version of the target data layout string in and methods for querying it.

static FixedVectorType * getExtendedElementVectorType(FixedVectorType *VTy)

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

IntegerType * getInt1Ty()

Fetch the type representing a single bit.

Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)

LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")

Create a call to Masked Load intrinsic.

LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)

Create a call to Masked Store intrinsic.

Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)

The core instruction combiner logic.

const DataLayout & getDataLayout() const

virtual Instruction * eraseInstFromFunction(Instruction &I)=0

Combiner aware instruction erasure.

IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy

An IRBuilder that automatically inserts new instructions into the worklist.

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0

virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0

static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)

Return the source operand of a potentially bitcasted value while optionally checking if it has one us...

void addToWorklist(Instruction *I)

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)

This constructs a pointer to an object of the specified type in a numbered address space.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)

LLVM_ABI unsigned getIntegerBitWidth() const

LLVM_ABI Type * getStructElementType(unsigned N) const

bool isVectorTy() const

True if this is an instance of VectorType.

bool isIntOrIntVectorTy() const

Return true if this is an integer type or a vector of integer types.

static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

static VectorType * getInteger(VectorType *VTy)

This static method gets a VectorType with the same number of elements as the input type,...

std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const override

Definition X86InstCombineIntrinsic.cpp:3092

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

Definition X86InstCombineIntrinsic.cpp:3132

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

Definition X86InstCombineIntrinsic.cpp:2154

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)

Splat/Merge neighboring bits to widen/narrow the bitmask represented by.

@ C

The default llvm calling convention, compatible with C.

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

OneUse_match< SubPat > m_OneUse(const SubPat &SP)

bool match(Val *V, const Pattern &P)

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

match_immconstant_ty m_ImmConstant()

Match an arbitrary immediate Constant and ignore it.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

auto m_Undef()

Match an arbitrary undef constant.

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)

Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.

bool isa_and_nonnull(const Y &Val)

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

FunctionAddr VTableAddr Count

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

@ Or

Bitwise or logical OR of integers.

@ Xor

Bitwise or logical XOR of integers.

@ And

Bitwise or logical AND of integers.

DWARFExpression::Operation Op

ArrayRef(const T &OneElt) -> ArrayRef< T >

constexpr unsigned BitWidth

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

constexpr T maskTrailingOnes(unsigned N)

Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.

LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.

This struct is a compact representation of a valid (non-zero power of two) alignment.

bool isZero() const

Returns true if value is all zero.

APInt getMaxValue() const

Return the maximal unsigned value possible given these KnownBits.

APInt getMinValue() const

Return the minimal unsigned value possible given these KnownBits.