clang: lib/CodeGen/TargetBuiltins/X86.cpp Source File (original) (raw)

26 switch (BuiltinID) {

27 default:

28 return std::nullopt;

29 case clang::X86::BI_BitScanForward:

30 case clang::X86::BI_BitScanForward64:

31 return MSVCIntrin::_BitScanForward;

32 case clang::X86::BI_BitScanReverse:

33 case clang::X86::BI_BitScanReverse64:

34 return MSVCIntrin::_BitScanReverse;

35 case clang::X86::BI_InterlockedAnd64:

36 return MSVCIntrin::_InterlockedAnd;

37 case clang::X86::BI_InterlockedCompareExchange128:

38 return MSVCIntrin::_InterlockedCompareExchange128;

39 case clang::X86::BI_InterlockedExchange64:

40 return MSVCIntrin::_InterlockedExchange;

41 case clang::X86::BI_InterlockedExchangeAdd64:

42 return MSVCIntrin::_InterlockedExchangeAdd;

43 case clang::X86::BI_InterlockedExchangeSub64:

44 return MSVCIntrin::_InterlockedExchangeSub;

45 case clang::X86::BI_InterlockedOr64:

46 return MSVCIntrin::_InterlockedOr;

47 case clang::X86::BI_InterlockedXor64:

48 return MSVCIntrin::_InterlockedXor;

49 case clang::X86::BI_InterlockedDecrement64:

50 return MSVCIntrin::_InterlockedDecrement;

51 case clang::X86::BI_InterlockedIncrement64:

52 return MSVCIntrin::_InterlockedIncrement;

53 }

54 llvm_unreachable("must return from switch");

55}

59 unsigned NumElts) {

60

61 auto *MaskTy = llvm::FixedVectorType::get(

64 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);

65

66

67

68 if (NumElts < 8) {

69 int Indices[4];

70 for (unsigned i = 0; i != NumElts; ++i)

71 Indices[i] = i;

72 MaskVec = CGF.Builder.CreateShuffleVector(

73 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");

74 }

75 return MaskVec;

76}

79 Align Alignment) {

80 Value *Ptr = Ops[0];

81

83 CGF, Ops[2],

85

86 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);

87}

90 Align Alignment) {

91 llvm::Type *Ty = Ops[1]->getType();

92 Value *Ptr = Ops[0];

93

96

97 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);

98}

103 Value *Ptr = Ops[0];

104

107

108 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,

109 ResultTy);

110 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });

111}

115 bool IsCompress) {

117

119

120 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress

121 : Intrinsic::x86_avx512_mask_expand;

123 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });

124}

129 Value *Ptr = Ops[0];

130

132

133 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,

134 ResultTy);

135 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });

136}

140 bool InvertLHS = false) {

141 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

144

145 if (InvertLHS)

146 LHS = CGF.Builder.CreateNot(LHS);

147

148 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),

149 Ops[0]->getType());

150}

153 Value *Amt, bool IsRight) {

154 llvm::Type *Ty = Op0->getType();

155

156

157

158

159 if (Amt->getType() != Ty) {

161 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);

162 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);

163 }

164

165 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;

167 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});

168}

171 bool IsSigned) {

172 Value *Op0 = Ops[0];

173 Value *Op1 = Ops[1];

174 llvm::Type *Ty = Op0->getType();

176

177 CmpInst::Predicate Pred;

178 switch (Imm) {

179 case 0x0:

180 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;

181 break;

182 case 0x1:

183 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;

184 break;

185 case 0x2:

186 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;

187 break;

188 case 0x3:

189 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;

190 break;

191 case 0x4:

192 Pred = ICmpInst::ICMP_EQ;

193 break;

194 case 0x5:

195 Pred = ICmpInst::ICMP_NE;

196 break;

197 case 0x6:

198 return llvm::Constant::getNullValue(Ty);

199 case 0x7:

200 return llvm::Constant::getAllOnesValue(Ty);

201 default:

202 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");

203 }

204

205 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);

206 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);

207 return Res;

208}

212

213

214 if (const auto *C = dyn_cast(Mask))

215 if (C->isAllOnesValue())

216 return Op0;

217

220

221 return CGF.Builder.CreateSelect(Mask, Op0, Op1);

222}

226

227 if (const auto *C = dyn_cast(Mask))

228 if (C->isAllOnesValue())

229 return Op0;

230

231 auto *MaskTy = llvm::FixedVectorType::get(

232 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());

233 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);

234 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);

235 return CGF.Builder.CreateSelect(Mask, Op0, Op1);

236}

239 unsigned NumElts, Value *MaskIn) {

240 if (MaskIn) {

241 const auto *C = dyn_cast(MaskIn);

242 if (C || C->isAllOnesValue())

244 }

245

246 if (NumElts < 8) {

247 int Indices[8];

248 for (unsigned i = 0; i != NumElts; ++i)

249 Indices[i] = i;

250 for (unsigned i = NumElts; i != 8; ++i)

251 Indices[i] = i % NumElts + NumElts;

252 Cmp = CGF.Builder.CreateShuffleVector(

253 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);

254 }

255

256 return CGF.Builder.CreateBitCast(Cmp,

258 std::max(NumElts, 8U)));

259}

263 assert((Ops.size() == 2 || Ops.size() == 4) &&

264 "Unexpected number of arguments");

265 unsigned NumElts =

268

269 if (CC == 3) {

270 Cmp = Constant::getNullValue(

271 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));

272 } else if (CC == 7) {

273 Cmp = Constant::getAllOnesValue(

274 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));

275 } else {

276 ICmpInst::Predicate Pred;

277 switch (CC) {

278 default: llvm_unreachable("Unknown condition code");

279 case 0: Pred = ICmpInst::ICMP_EQ; break;

280 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;

281 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;

282 case 4: Pred = ICmpInst::ICMP_NE; break;

283 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;

284 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;

285 }

286 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);

287 }

288

289 Value *MaskIn = nullptr;

290 if (Ops.size() == 4)

291 MaskIn = Ops[3];

292

294}

297 Value *Zero = Constant::getNullValue(In->getType());

299}

304 llvm::Type *Ty = Ops[1]->getType();

305

307 if (Rnd != 4) {

308 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round

309 : Intrinsic::x86_avx512_uitofp_round;

310 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });

311 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });

312 } else {

314 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)

315 : CGF.Builder.CreateUIToFP(Ops[0], Ty);

316 }

317

319}

324 bool IsAddSub) {

325

326 bool Subtract = false;

327 Intrinsic::ID IID = Intrinsic::not_intrinsic;

328 switch (BuiltinID) {

329 default: break;

330 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:

331 Subtract = true;

332 [[fallthrough]];

333 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:

334 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:

335 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:

336 IID = Intrinsic::x86_avx512fp16_vfmadd_ph_512;

337 break;

338 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:

339 Subtract = true;

340 [[fallthrough]];

341 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:

342 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:

343 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:

344 IID = Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;

345 break;

346 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:

347 Subtract = true;

348 [[fallthrough]];

349 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:

350 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:

351 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:

352 IID = Intrinsic::x86_avx512_vfmadd_ps_512; break;

353 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:

354 Subtract = true;

355 [[fallthrough]];

356 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:

357 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:

358 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:

359 IID = Intrinsic::x86_avx512_vfmadd_pd_512; break;

360 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:

361 Subtract = true;

362 [[fallthrough]];

363 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:

364 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:

365 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:

366 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;

367 break;

368 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:

369 Subtract = true;

370 [[fallthrough]];

371 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:

372 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:

373 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:

374 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;

375 break;

376 }

377

378 Value *A = Ops[0];

379 Value *B = Ops[1];

381

382 if (Subtract)

384

386

387

388 if (IID != Intrinsic::not_intrinsic &&

390 IsAddSub)) {

392 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });

393 } else {

394 llvm::Type *Ty = A->getType();

395 Function *FMA;

396 if (CGF.Builder.getIsFPConstrained()) {

398 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);

399 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});

400 } else {

402 Res = CGF.Builder.CreateCall(FMA, {A, B, C});

403 }

404 }

405

406

407 Value *MaskFalseVal = nullptr;

408 switch (BuiltinID) {

409 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:

410 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:

411 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:

412 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:

413 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:

414 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:

415 MaskFalseVal = Ops[0];

416 break;

417 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:

418 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:

419 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:

420 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:

421 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:

422 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:

423 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());

424 break;

425 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:

426 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:

427 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:

428 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:

429 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:

430 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:

431 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:

432 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:

433 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:

434 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:

435 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:

436 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:

437 MaskFalseVal = Ops[2];

438 break;

439 }

440

441 if (MaskFalseVal)

442 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);

443

444 return Res;

445}

449 bool ZeroMask = false, unsigned PTIdx = 0,

450 bool NegAcc = false) {

451 unsigned Rnd = 4;

452 if (Ops.size() > 4)

454

455 if (NegAcc)

456 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);

457

458 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);

459 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);

460 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);

462 if (Rnd != 4) {

463 Intrinsic::ID IID;

464

465 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {

466 case 16:

467 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;

468 break;

469 case 32:

470 IID = Intrinsic::x86_avx512_vfmadd_f32;

471 break;

472 case 64:

473 IID = Intrinsic::x86_avx512_vfmadd_f64;

474 break;

475 default:

476 llvm_unreachable("Unexpected size");

477 }

479 {Ops[0], Ops[1], Ops[2], Ops[4]});

480 } else if (CGF.Builder.getIsFPConstrained()) {

483 Intrinsic::experimental_constrained_fma, Ops[0]->getType());

484 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));

485 } else {

487 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));

488 }

489

490 if (Ops.size() > 3) {

491 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())

492 : Ops[PTIdx];

493

494

495

496

497 if (NegAcc && PTIdx == 2)

498 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);

499

501 }

502 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);

503}

507 llvm::Type *Ty = Ops[0]->getType();

508

509 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,

510 Ty->getPrimitiveSizeInBits() / 64);

511 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);

512 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);

513

514 if (IsSigned) {

515

516 Constant *ShiftAmt = ConstantInt::get(Ty, 32);

517 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);

518 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);

519 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);

520 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);

521 } else {

522

523 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);

524 LHS = CGF.Builder.CreateAnd(LHS, Mask);

525 RHS = CGF.Builder.CreateAnd(RHS, Mask);

526 }

527

528 return CGF.Builder.CreateMul(LHS, RHS);

529}

536 llvm::Type *Ty = Ops[0]->getType();

537

538 unsigned VecWidth = Ty->getPrimitiveSizeInBits();

539 unsigned EltWidth = Ty->getScalarSizeInBits();

540 Intrinsic::ID IID;

541 if (VecWidth == 128 && EltWidth == 32)

542 IID = Intrinsic::x86_avx512_pternlog_d_128;

543 else if (VecWidth == 256 && EltWidth == 32)

544 IID = Intrinsic::x86_avx512_pternlog_d_256;

545 else if (VecWidth == 512 && EltWidth == 32)

546 IID = Intrinsic::x86_avx512_pternlog_d_512;

547 else if (VecWidth == 128 && EltWidth == 64)

548 IID = Intrinsic::x86_avx512_pternlog_q_128;

549 else if (VecWidth == 256 && EltWidth == 64)

550 IID = Intrinsic::x86_avx512_pternlog_q_256;

551 else if (VecWidth == 512 && EltWidth == 64)

552 IID = Intrinsic::x86_avx512_pternlog_q_512;

553 else

554 llvm_unreachable("Unexpected intrinsic");

555

557 Ops.drop_back());

558 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];

559 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);

560}

563 llvm::Type *DstTy) {

564 unsigned NumberOfElements =

567 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");

568}

579 llvm::Type *DstTy) {

580 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&

581 "Unknown cvtph2ps intrinsic");

582

583

585 Function *F =

586 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);

587 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});

588 }

589

591 Value *Src = Ops[0];

592

593

594 if (NumDstElts !=

596 assert(NumDstElts == 4 && "Unexpected vector size");

597 Src = CGF.Builder.CreateShuffleVector(Src, {0, 1, 2, 3});

598 }

599

600

601 auto *HalfTy = llvm::FixedVectorType::get(

602 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);

603 Src = CGF.Builder.CreateBitCast(Src, HalfTy);

604

605

606 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");

607

608 if (Ops.size() >= 3)

610 return Res;

611}

739 if (BuiltinID == Builtin::BI__builtin_cpu_is)

740 return EmitX86CpuIs(E);

741 if (BuiltinID == Builtin::BI__builtin_cpu_supports)

742 return EmitX86CpuSupports(E);

743 if (BuiltinID == Builtin::BI__builtin_cpu_init)

744 return EmitX86CpuInit();

745

746

747

750

752 bool IsMaskFCmp = false;

753 bool IsConjFMA = false;

754

755

756 unsigned ICEArguments = 0;

760

761 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {

763 }

764

765

766

767

768

769

770

771 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {

772 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));

773 llvm::Function *F = CGM.getIntrinsic(ID);

774 return Builder.CreateCall(F, Ops);

775 };

776

777

778

779

780

781

782 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,

783 bool IsSignaling) {

786 if (IsSignaling)

787 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);

788 else

789 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);

791 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);

792 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);

793 return Builder.CreateBitCast(Sext, FPVecTy);

794 };

795

796 switch (BuiltinID) {

797 default: return nullptr;

798 case X86::BI_mm_prefetch: {

801 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);

802 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);

806 }

807 case X86::BI_m_prefetch:

808 case X86::BI_m_prefetchw: {

810

812 ConstantInt::get(Int32Ty, BuiltinID == X86::BI_m_prefetchw ? 1 : 0);

813 Value *Locality = ConstantInt::get(Int32Ty, 0x3);

817 }

818 case X86::BI_mm_clflush: {

819 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),

820 Ops[0]);

821 }

822 case X86::BI_mm_lfence: {

823 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));

824 }

825 case X86::BI_mm_mfence: {

826 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));

827 }

828 case X86::BI_mm_sfence: {

829 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));

830 }

831 case X86::BI_mm_pause: {

832 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));

833 }

834 case X86::BI__rdtsc: {

835 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));

836 }

837 case X86::BI__builtin_ia32_rdtscp: {

838 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));

839 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),

840 Ops[0]);

841 return Builder.CreateExtractValue(Call, 0);

842 }

843 case X86::BI__builtin_ia32_lzcnt_u16:

844 case X86::BI__builtin_ia32_lzcnt_u32:

845 case X86::BI__builtin_ia32_lzcnt_u64: {

847 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});

848 }

849 case X86::BI__builtin_ia32_tzcnt_u16:

850 case X86::BI__builtin_ia32_tzcnt_u32:

851 case X86::BI__builtin_ia32_tzcnt_u64: {

853 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});

854 }

855 case X86::BI__builtin_ia32_undef128:

856 case X86::BI__builtin_ia32_undef256:

857 case X86::BI__builtin_ia32_undef512:

858

859

860

861

862

864 case X86::BI__builtin_ia32_vec_ext_v4hi:

865 case X86::BI__builtin_ia32_vec_ext_v16qi:

866 case X86::BI__builtin_ia32_vec_ext_v8hi:

867 case X86::BI__builtin_ia32_vec_ext_v4si:

868 case X86::BI__builtin_ia32_vec_ext_v4sf:

869 case X86::BI__builtin_ia32_vec_ext_v2di:

870 case X86::BI__builtin_ia32_vec_ext_v32qi:

871 case X86::BI__builtin_ia32_vec_ext_v16hi:

872 case X86::BI__builtin_ia32_vec_ext_v8si:

873 case X86::BI__builtin_ia32_vec_ext_v4di: {

874 unsigned NumElts =

877 Index &= NumElts - 1;

878

879

880 return Builder.CreateExtractElement(Ops[0], Index);

881 }

882 case X86::BI__builtin_ia32_vec_set_v4hi:

883 case X86::BI__builtin_ia32_vec_set_v16qi:

884 case X86::BI__builtin_ia32_vec_set_v8hi:

885 case X86::BI__builtin_ia32_vec_set_v4si:

886 case X86::BI__builtin_ia32_vec_set_v2di:

887 case X86::BI__builtin_ia32_vec_set_v32qi:

888 case X86::BI__builtin_ia32_vec_set_v16hi:

889 case X86::BI__builtin_ia32_vec_set_v8si:

890 case X86::BI__builtin_ia32_vec_set_v4di: {

891 unsigned NumElts =

894 Index &= NumElts - 1;

895

896

897 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);

898 }

899 case X86::BI_mm_setcsr:

900 case X86::BI__builtin_ia32_ldmxcsr: {

902 Builder.CreateStore(Ops[0], Tmp);

903 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),

905 }

906 case X86::BI_mm_getcsr:

907 case X86::BI__builtin_ia32_stmxcsr: {

909 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),

911 return Builder.CreateLoad(Tmp, "stmxcsr");

912 }

913 case X86::BI__builtin_ia32_xsave:

914 case X86::BI__builtin_ia32_xsave64:

915 case X86::BI__builtin_ia32_xrstor:

916 case X86::BI__builtin_ia32_xrstor64:

917 case X86::BI__builtin_ia32_xsaveopt:

918 case X86::BI__builtin_ia32_xsaveopt64:

919 case X86::BI__builtin_ia32_xrstors:

920 case X86::BI__builtin_ia32_xrstors64:

921 case X86::BI__builtin_ia32_xsavec:

922 case X86::BI__builtin_ia32_xsavec64:

923 case X86::BI__builtin_ia32_xsaves:

924 case X86::BI__builtin_ia32_xsaves64:

925 case X86::BI__builtin_ia32_xsetbv:

926 case X86::BI_xsetbv: {

927 Intrinsic::ID ID;

928#define INTRINSIC_X86_XSAVE_ID(NAME) \

929 case X86::BI__builtin_ia32_##NAME: \

930 ID = Intrinsic::x86_##NAME; \

931 break

932 switch (BuiltinID) {

933 default: llvm_unreachable("Unsupported intrinsic!");

947 case X86::BI_xsetbv:

948 ID = Intrinsic::x86_xsetbv;

949 break;

950 }

951#undef INTRINSIC_X86_XSAVE_ID

955 Ops[1] = Mhi;

956 Ops.push_back(Mlo);

957 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);

958 }

959 case X86::BI__builtin_ia32_xgetbv:

960 case X86::BI_xgetbv:

961 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);

962 case X86::BI__builtin_ia32_storedqudi128_mask:

963 case X86::BI__builtin_ia32_storedqusi128_mask:

964 case X86::BI__builtin_ia32_storedquhi128_mask:

965 case X86::BI__builtin_ia32_storedquqi128_mask:

966 case X86::BI__builtin_ia32_storeupd128_mask:

967 case X86::BI__builtin_ia32_storeups128_mask:

968 case X86::BI__builtin_ia32_storedqudi256_mask:

969 case X86::BI__builtin_ia32_storedqusi256_mask:

970 case X86::BI__builtin_ia32_storedquhi256_mask:

971 case X86::BI__builtin_ia32_storedquqi256_mask:

972 case X86::BI__builtin_ia32_storeupd256_mask:

973 case X86::BI__builtin_ia32_storeups256_mask:

974 case X86::BI__builtin_ia32_storedqudi512_mask:

975 case X86::BI__builtin_ia32_storedqusi512_mask:

976 case X86::BI__builtin_ia32_storedquhi512_mask:

977 case X86::BI__builtin_ia32_storedquqi512_mask:

978 case X86::BI__builtin_ia32_storeupd512_mask:

979 case X86::BI__builtin_ia32_storeups512_mask:

981

982 case X86::BI__builtin_ia32_storesbf16128_mask:

983 case X86::BI__builtin_ia32_storesh128_mask:

984 case X86::BI__builtin_ia32_storess128_mask:

985 case X86::BI__builtin_ia32_storesd128_mask:

987

988 case X86::BI__builtin_ia32_cvtmask2b128:

989 case X86::BI__builtin_ia32_cvtmask2b256:

990 case X86::BI__builtin_ia32_cvtmask2b512:

991 case X86::BI__builtin_ia32_cvtmask2w128:

992 case X86::BI__builtin_ia32_cvtmask2w256:

993 case X86::BI__builtin_ia32_cvtmask2w512:

994 case X86::BI__builtin_ia32_cvtmask2d128:

995 case X86::BI__builtin_ia32_cvtmask2d256:

996 case X86::BI__builtin_ia32_cvtmask2d512:

997 case X86::BI__builtin_ia32_cvtmask2q128:

998 case X86::BI__builtin_ia32_cvtmask2q256:

999 case X86::BI__builtin_ia32_cvtmask2q512:

1001

1002 case X86::BI__builtin_ia32_cvtb2mask128:

1003 case X86::BI__builtin_ia32_cvtb2mask256:

1004 case X86::BI__builtin_ia32_cvtb2mask512:

1005 case X86::BI__builtin_ia32_cvtw2mask128:

1006 case X86::BI__builtin_ia32_cvtw2mask256:

1007 case X86::BI__builtin_ia32_cvtw2mask512:

1008 case X86::BI__builtin_ia32_cvtd2mask128:

1009 case X86::BI__builtin_ia32_cvtd2mask256:

1010 case X86::BI__builtin_ia32_cvtd2mask512:

1011 case X86::BI__builtin_ia32_cvtq2mask128:

1012 case X86::BI__builtin_ia32_cvtq2mask256:

1013 case X86::BI__builtin_ia32_cvtq2mask512:

1015

1016 case X86::BI__builtin_ia32_cvtdq2ps512_mask:

1017 case X86::BI__builtin_ia32_cvtqq2ps512_mask:

1018 case X86::BI__builtin_ia32_cvtqq2pd512_mask:

1019 case X86::BI__builtin_ia32_vcvtw2ph512_mask:

1020 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:

1021 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:

1023 case X86::BI__builtin_ia32_cvtudq2ps512_mask:

1024 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:

1025 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:

1026 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:

1027 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:

1028 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:

1030

1031 case X86::BI__builtin_ia32_vfmaddsh3_mask:

1032 case X86::BI__builtin_ia32_vfmaddss3_mask:

1033 case X86::BI__builtin_ia32_vfmaddsd3_mask:

1035 case X86::BI__builtin_ia32_vfmaddsh3_maskz:

1036 case X86::BI__builtin_ia32_vfmaddss3_maskz:

1037 case X86::BI__builtin_ia32_vfmaddsd3_maskz:

1038 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], true);

1039 case X86::BI__builtin_ia32_vfmaddsh3_mask3:

1040 case X86::BI__builtin_ia32_vfmaddss3_mask3:

1041 case X86::BI__builtin_ia32_vfmaddsd3_mask3:

1042 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], false, 2);

1043 case X86::BI__builtin_ia32_vfmsubsh3_mask3:

1044 case X86::BI__builtin_ia32_vfmsubss3_mask3:

1045 case X86::BI__builtin_ia32_vfmsubsd3_mask3:

1046 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], false, 2,

1047 true);

1048 case X86::BI__builtin_ia32_vfmaddph512_mask:

1049 case X86::BI__builtin_ia32_vfmaddph512_maskz:

1050 case X86::BI__builtin_ia32_vfmaddph512_mask3:

1051 case X86::BI__builtin_ia32_vfmaddps512_mask:

1052 case X86::BI__builtin_ia32_vfmaddps512_maskz:

1053 case X86::BI__builtin_ia32_vfmaddps512_mask3:

1054 case X86::BI__builtin_ia32_vfmsubps512_mask3:

1055 case X86::BI__builtin_ia32_vfmaddpd512_mask:

1056 case X86::BI__builtin_ia32_vfmaddpd512_maskz:

1057 case X86::BI__builtin_ia32_vfmaddpd512_mask3:

1058 case X86::BI__builtin_ia32_vfmsubpd512_mask3:

1059 case X86::BI__builtin_ia32_vfmsubph512_mask3:

1060 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, false);

1061 case X86::BI__builtin_ia32_vfmaddsubph512_mask:

1062 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:

1063 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:

1064 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:

1065 case X86::BI__builtin_ia32_vfmaddsubps512_mask:

1066 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:

1067 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:

1068 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:

1069 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:

1070 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:

1071 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:

1072 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:

1073 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, true);

1074

1075 case X86::BI__builtin_ia32_movdqa32store128_mask:

1076 case X86::BI__builtin_ia32_movdqa64store128_mask:

1077 case X86::BI__builtin_ia32_storeaps128_mask:

1078 case X86::BI__builtin_ia32_storeapd128_mask:

1079 case X86::BI__builtin_ia32_movdqa32store256_mask:

1080 case X86::BI__builtin_ia32_movdqa64store256_mask:

1081 case X86::BI__builtin_ia32_storeaps256_mask:

1082 case X86::BI__builtin_ia32_storeapd256_mask:

1083 case X86::BI__builtin_ia32_movdqa32store512_mask:

1084 case X86::BI__builtin_ia32_movdqa64store512_mask:

1085 case X86::BI__builtin_ia32_storeaps512_mask:

1086 case X86::BI__builtin_ia32_storeapd512_mask:

1088 *this, Ops,

1090

1091 case X86::BI__builtin_ia32_loadups128_mask:

1092 case X86::BI__builtin_ia32_loadups256_mask:

1093 case X86::BI__builtin_ia32_loadups512_mask:

1094 case X86::BI__builtin_ia32_loadupd128_mask:

1095 case X86::BI__builtin_ia32_loadupd256_mask:

1096 case X86::BI__builtin_ia32_loadupd512_mask:

1097 case X86::BI__builtin_ia32_loaddquqi128_mask:

1098 case X86::BI__builtin_ia32_loaddquqi256_mask:

1099 case X86::BI__builtin_ia32_loaddquqi512_mask:

1100 case X86::BI__builtin_ia32_loaddquhi128_mask:

1101 case X86::BI__builtin_ia32_loaddquhi256_mask:

1102 case X86::BI__builtin_ia32_loaddquhi512_mask:

1103 case X86::BI__builtin_ia32_loaddqusi128_mask:

1104 case X86::BI__builtin_ia32_loaddqusi256_mask:

1105 case X86::BI__builtin_ia32_loaddqusi512_mask:

1106 case X86::BI__builtin_ia32_loaddqudi128_mask:

1107 case X86::BI__builtin_ia32_loaddqudi256_mask:

1108 case X86::BI__builtin_ia32_loaddqudi512_mask:

1110

1111 case X86::BI__builtin_ia32_loadsbf16128_mask:

1112 case X86::BI__builtin_ia32_loadsh128_mask:

1113 case X86::BI__builtin_ia32_loadss128_mask:

1114 case X86::BI__builtin_ia32_loadsd128_mask:

1116

1117 case X86::BI__builtin_ia32_loadaps128_mask:

1118 case X86::BI__builtin_ia32_loadaps256_mask:

1119 case X86::BI__builtin_ia32_loadaps512_mask:

1120 case X86::BI__builtin_ia32_loadapd128_mask:

1121 case X86::BI__builtin_ia32_loadapd256_mask:

1122 case X86::BI__builtin_ia32_loadapd512_mask:

1123 case X86::BI__builtin_ia32_movdqa32load128_mask:

1124 case X86::BI__builtin_ia32_movdqa32load256_mask:

1125 case X86::BI__builtin_ia32_movdqa32load512_mask:

1126 case X86::BI__builtin_ia32_movdqa64load128_mask:

1127 case X86::BI__builtin_ia32_movdqa64load256_mask:

1128 case X86::BI__builtin_ia32_movdqa64load512_mask:

1130 *this, Ops,

1132

1133 case X86::BI__builtin_ia32_expandloaddf128_mask:

1134 case X86::BI__builtin_ia32_expandloaddf256_mask:

1135 case X86::BI__builtin_ia32_expandloaddf512_mask:

1136 case X86::BI__builtin_ia32_expandloadsf128_mask:

1137 case X86::BI__builtin_ia32_expandloadsf256_mask:

1138 case X86::BI__builtin_ia32_expandloadsf512_mask:

1139 case X86::BI__builtin_ia32_expandloaddi128_mask:

1140 case X86::BI__builtin_ia32_expandloaddi256_mask:

1141 case X86::BI__builtin_ia32_expandloaddi512_mask:

1142 case X86::BI__builtin_ia32_expandloadsi128_mask:

1143 case X86::BI__builtin_ia32_expandloadsi256_mask:

1144 case X86::BI__builtin_ia32_expandloadsi512_mask:

1145 case X86::BI__builtin_ia32_expandloadhi128_mask:

1146 case X86::BI__builtin_ia32_expandloadhi256_mask:

1147 case X86::BI__builtin_ia32_expandloadhi512_mask:

1148 case X86::BI__builtin_ia32_expandloadqi128_mask:

1149 case X86::BI__builtin_ia32_expandloadqi256_mask:

1150 case X86::BI__builtin_ia32_expandloadqi512_mask:

1152

1153 case X86::BI__builtin_ia32_compressstoredf128_mask:

1154 case X86::BI__builtin_ia32_compressstoredf256_mask:

1155 case X86::BI__builtin_ia32_compressstoredf512_mask:

1156 case X86::BI__builtin_ia32_compressstoresf128_mask:

1157 case X86::BI__builtin_ia32_compressstoresf256_mask:

1158 case X86::BI__builtin_ia32_compressstoresf512_mask:

1159 case X86::BI__builtin_ia32_compressstoredi128_mask:

1160 case X86::BI__builtin_ia32_compressstoredi256_mask:

1161 case X86::BI__builtin_ia32_compressstoredi512_mask:

1162 case X86::BI__builtin_ia32_compressstoresi128_mask:

1163 case X86::BI__builtin_ia32_compressstoresi256_mask:

1164 case X86::BI__builtin_ia32_compressstoresi512_mask:

1165 case X86::BI__builtin_ia32_compressstorehi128_mask:

1166 case X86::BI__builtin_ia32_compressstorehi256_mask:

1167 case X86::BI__builtin_ia32_compressstorehi512_mask:

1168 case X86::BI__builtin_ia32_compressstoreqi128_mask:

1169 case X86::BI__builtin_ia32_compressstoreqi256_mask:

1170 case X86::BI__builtin_ia32_compressstoreqi512_mask:

1172

1173 case X86::BI__builtin_ia32_expanddf128_mask:

1174 case X86::BI__builtin_ia32_expanddf256_mask:

1175 case X86::BI__builtin_ia32_expanddf512_mask:

1176 case X86::BI__builtin_ia32_expandsf128_mask:

1177 case X86::BI__builtin_ia32_expandsf256_mask:

1178 case X86::BI__builtin_ia32_expandsf512_mask:

1179 case X86::BI__builtin_ia32_expanddi128_mask:

1180 case X86::BI__builtin_ia32_expanddi256_mask:

1181 case X86::BI__builtin_ia32_expanddi512_mask:

1182 case X86::BI__builtin_ia32_expandsi128_mask:

1183 case X86::BI__builtin_ia32_expandsi256_mask:

1184 case X86::BI__builtin_ia32_expandsi512_mask:

1185 case X86::BI__builtin_ia32_expandhi128_mask:

1186 case X86::BI__builtin_ia32_expandhi256_mask:

1187 case X86::BI__builtin_ia32_expandhi512_mask:

1188 case X86::BI__builtin_ia32_expandqi128_mask:

1189 case X86::BI__builtin_ia32_expandqi256_mask:

1190 case X86::BI__builtin_ia32_expandqi512_mask:

1192

1193 case X86::BI__builtin_ia32_compressdf128_mask:

1194 case X86::BI__builtin_ia32_compressdf256_mask:

1195 case X86::BI__builtin_ia32_compressdf512_mask:

1196 case X86::BI__builtin_ia32_compresssf128_mask:

1197 case X86::BI__builtin_ia32_compresssf256_mask:

1198 case X86::BI__builtin_ia32_compresssf512_mask:

1199 case X86::BI__builtin_ia32_compressdi128_mask:

1200 case X86::BI__builtin_ia32_compressdi256_mask:

1201 case X86::BI__builtin_ia32_compressdi512_mask:

1202 case X86::BI__builtin_ia32_compresssi128_mask:

1203 case X86::BI__builtin_ia32_compresssi256_mask:

1204 case X86::BI__builtin_ia32_compresssi512_mask:

1205 case X86::BI__builtin_ia32_compresshi128_mask:

1206 case X86::BI__builtin_ia32_compresshi256_mask:

1207 case X86::BI__builtin_ia32_compresshi512_mask:

1208 case X86::BI__builtin_ia32_compressqi128_mask:

1209 case X86::BI__builtin_ia32_compressqi256_mask:

1210 case X86::BI__builtin_ia32_compressqi512_mask:

1212

1213 case X86::BI__builtin_ia32_gather3div2df:

1214 case X86::BI__builtin_ia32_gather3div2di:

1215 case X86::BI__builtin_ia32_gather3div4df:

1216 case X86::BI__builtin_ia32_gather3div4di:

1217 case X86::BI__builtin_ia32_gather3div4sf:

1218 case X86::BI__builtin_ia32_gather3div4si:

1219 case X86::BI__builtin_ia32_gather3div8sf:

1220 case X86::BI__builtin_ia32_gather3div8si:

1221 case X86::BI__builtin_ia32_gather3siv2df:

1222 case X86::BI__builtin_ia32_gather3siv2di:

1223 case X86::BI__builtin_ia32_gather3siv4df:

1224 case X86::BI__builtin_ia32_gather3siv4di:

1225 case X86::BI__builtin_ia32_gather3siv4sf:

1226 case X86::BI__builtin_ia32_gather3siv4si:

1227 case X86::BI__builtin_ia32_gather3siv8sf:

1228 case X86::BI__builtin_ia32_gather3siv8si:

1229 case X86::BI__builtin_ia32_gathersiv8df:

1230 case X86::BI__builtin_ia32_gathersiv16sf:

1231 case X86::BI__builtin_ia32_gatherdiv8df:

1232 case X86::BI__builtin_ia32_gatherdiv16sf:

1233 case X86::BI__builtin_ia32_gathersiv8di:

1234 case X86::BI__builtin_ia32_gathersiv16si:

1235 case X86::BI__builtin_ia32_gatherdiv8di:

1236 case X86::BI__builtin_ia32_gatherdiv16si: {

1237 Intrinsic::ID IID;

1238 switch (BuiltinID) {

1239 default: llvm_unreachable("Unexpected builtin");

1240 case X86::BI__builtin_ia32_gather3div2df:

1241 IID = Intrinsic::x86_avx512_mask_gather3div2_df;

1242 break;

1243 case X86::BI__builtin_ia32_gather3div2di:

1244 IID = Intrinsic::x86_avx512_mask_gather3div2_di;

1245 break;

1246 case X86::BI__builtin_ia32_gather3div4df:

1247 IID = Intrinsic::x86_avx512_mask_gather3div4_df;

1248 break;

1249 case X86::BI__builtin_ia32_gather3div4di:

1250 IID = Intrinsic::x86_avx512_mask_gather3div4_di;

1251 break;

1252 case X86::BI__builtin_ia32_gather3div4sf:

1253 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;

1254 break;

1255 case X86::BI__builtin_ia32_gather3div4si:

1256 IID = Intrinsic::x86_avx512_mask_gather3div4_si;

1257 break;

1258 case X86::BI__builtin_ia32_gather3div8sf:

1259 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;

1260 break;

1261 case X86::BI__builtin_ia32_gather3div8si:

1262 IID = Intrinsic::x86_avx512_mask_gather3div8_si;

1263 break;

1264 case X86::BI__builtin_ia32_gather3siv2df:

1265 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;

1266 break;

1267 case X86::BI__builtin_ia32_gather3siv2di:

1268 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;

1269 break;

1270 case X86::BI__builtin_ia32_gather3siv4df:

1271 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;

1272 break;

1273 case X86::BI__builtin_ia32_gather3siv4di:

1274 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;

1275 break;

1276 case X86::BI__builtin_ia32_gather3siv4sf:

1277 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;

1278 break;

1279 case X86::BI__builtin_ia32_gather3siv4si:

1280 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;

1281 break;

1282 case X86::BI__builtin_ia32_gather3siv8sf:

1283 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;

1284 break;

1285 case X86::BI__builtin_ia32_gather3siv8si:

1286 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;

1287 break;

1288 case X86::BI__builtin_ia32_gathersiv8df:

1289 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;

1290 break;

1291 case X86::BI__builtin_ia32_gathersiv16sf:

1292 IID = Intrinsic::x86_avx512_mask_gather_dps_512;

1293 break;

1294 case X86::BI__builtin_ia32_gatherdiv8df:

1295 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;

1296 break;

1297 case X86::BI__builtin_ia32_gatherdiv16sf:

1298 IID = Intrinsic::x86_avx512_mask_gather_qps_512;

1299 break;

1300 case X86::BI__builtin_ia32_gathersiv8di:

1301 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;

1302 break;

1303 case X86::BI__builtin_ia32_gathersiv16si:

1304 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;

1305 break;

1306 case X86::BI__builtin_ia32_gatherdiv8di:

1307 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;

1308 break;

1309 case X86::BI__builtin_ia32_gatherdiv16si:

1310 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;

1311 break;

1312 }

1313

1314 unsigned MinElts = std::min(

1319 return Builder.CreateCall(Intr, Ops);

1320 }

1321

1322 case X86::BI__builtin_ia32_scattersiv8df:

1323 case X86::BI__builtin_ia32_scattersiv16sf:

1324 case X86::BI__builtin_ia32_scatterdiv8df:

1325 case X86::BI__builtin_ia32_scatterdiv16sf:

1326 case X86::BI__builtin_ia32_scattersiv8di:

1327 case X86::BI__builtin_ia32_scattersiv16si:

1328 case X86::BI__builtin_ia32_scatterdiv8di:

1329 case X86::BI__builtin_ia32_scatterdiv16si:

1330 case X86::BI__builtin_ia32_scatterdiv2df:

1331 case X86::BI__builtin_ia32_scatterdiv2di:

1332 case X86::BI__builtin_ia32_scatterdiv4df:

1333 case X86::BI__builtin_ia32_scatterdiv4di:

1334 case X86::BI__builtin_ia32_scatterdiv4sf:

1335 case X86::BI__builtin_ia32_scatterdiv4si:

1336 case X86::BI__builtin_ia32_scatterdiv8sf:

1337 case X86::BI__builtin_ia32_scatterdiv8si:

1338 case X86::BI__builtin_ia32_scattersiv2df:

1339 case X86::BI__builtin_ia32_scattersiv2di:

1340 case X86::BI__builtin_ia32_scattersiv4df:

1341 case X86::BI__builtin_ia32_scattersiv4di:

1342 case X86::BI__builtin_ia32_scattersiv4sf:

1343 case X86::BI__builtin_ia32_scattersiv4si:

1344 case X86::BI__builtin_ia32_scattersiv8sf:

1345 case X86::BI__builtin_ia32_scattersiv8si: {

1346 Intrinsic::ID IID;

1347 switch (BuiltinID) {

1348 default: llvm_unreachable("Unexpected builtin");

1349 case X86::BI__builtin_ia32_scattersiv8df:

1350 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;

1351 break;

1352 case X86::BI__builtin_ia32_scattersiv16sf:

1353 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;

1354 break;

1355 case X86::BI__builtin_ia32_scatterdiv8df:

1356 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;

1357 break;

1358 case X86::BI__builtin_ia32_scatterdiv16sf:

1359 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;

1360 break;

1361 case X86::BI__builtin_ia32_scattersiv8di:

1362 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;

1363 break;

1364 case X86::BI__builtin_ia32_scattersiv16si:

1365 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;

1366 break;

1367 case X86::BI__builtin_ia32_scatterdiv8di:

1368 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;

1369 break;

1370 case X86::BI__builtin_ia32_scatterdiv16si:

1371 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;

1372 break;

1373 case X86::BI__builtin_ia32_scatterdiv2df:

1374 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;

1375 break;

1376 case X86::BI__builtin_ia32_scatterdiv2di:

1377 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;

1378 break;

1379 case X86::BI__builtin_ia32_scatterdiv4df:

1380 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;

1381 break;

1382 case X86::BI__builtin_ia32_scatterdiv4di:

1383 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;

1384 break;

1385 case X86::BI__builtin_ia32_scatterdiv4sf:

1386 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;

1387 break;

1388 case X86::BI__builtin_ia32_scatterdiv4si:

1389 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;

1390 break;

1391 case X86::BI__builtin_ia32_scatterdiv8sf:

1392 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;

1393 break;

1394 case X86::BI__builtin_ia32_scatterdiv8si:

1395 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;

1396 break;

1397 case X86::BI__builtin_ia32_scattersiv2df:

1398 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;

1399 break;

1400 case X86::BI__builtin_ia32_scattersiv2di:

1401 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;

1402 break;

1403 case X86::BI__builtin_ia32_scattersiv4df:

1404 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;

1405 break;

1406 case X86::BI__builtin_ia32_scattersiv4di:

1407 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;

1408 break;

1409 case X86::BI__builtin_ia32_scattersiv4sf:

1410 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;

1411 break;

1412 case X86::BI__builtin_ia32_scattersiv4si:

1413 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;

1414 break;

1415 case X86::BI__builtin_ia32_scattersiv8sf:

1416 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;

1417 break;

1418 case X86::BI__builtin_ia32_scattersiv8si:

1419 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;

1420 break;

1421 }

1422

1423 unsigned MinElts = std::min(

1428 return Builder.CreateCall(Intr, Ops);

1429 }

1430

1431 case X86::BI__builtin_ia32_vextractf128_pd256:

1432 case X86::BI__builtin_ia32_vextractf128_ps256:

1433 case X86::BI__builtin_ia32_vextractf128_si256:

1434 case X86::BI__builtin_ia32_extract128i256:

1435 case X86::BI__builtin_ia32_extractf64x4_mask:

1436 case X86::BI__builtin_ia32_extractf32x4_mask:

1437 case X86::BI__builtin_ia32_extracti64x4_mask:

1438 case X86::BI__builtin_ia32_extracti32x4_mask:

1439 case X86::BI__builtin_ia32_extractf32x8_mask:

1440 case X86::BI__builtin_ia32_extracti32x8_mask:

1441 case X86::BI__builtin_ia32_extractf32x4_256_mask:

1442 case X86::BI__builtin_ia32_extracti32x4_256_mask:

1443 case X86::BI__builtin_ia32_extractf64x2_256_mask:

1444 case X86::BI__builtin_ia32_extracti64x2_256_mask:

1445 case X86::BI__builtin_ia32_extractf64x2_512_mask:

1446 case X86::BI__builtin_ia32_extracti64x2_512_mask: {

1448 unsigned NumElts = DstTy->getNumElements();

1449 unsigned SrcNumElts =

1451 unsigned SubVectors = SrcNumElts / NumElts;

1453 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");

1454 Index &= SubVectors - 1;

1455 Index *= NumElts;

1456

1457 int Indices[16];

1458 for (unsigned i = 0; i != NumElts; ++i)

1459 Indices[i] = i + Index;

1460

1461 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),

1462 "extract");

1463

1464 if (Ops.size() == 4)

1465 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);

1466

1467 return Res;

1468 }

1469 case X86::BI__builtin_ia32_vinsertf128_pd256:

1470 case X86::BI__builtin_ia32_vinsertf128_ps256:

1471 case X86::BI__builtin_ia32_vinsertf128_si256:

1472 case X86::BI__builtin_ia32_insert128i256:

1473 case X86::BI__builtin_ia32_insertf64x4:

1474 case X86::BI__builtin_ia32_insertf32x4:

1475 case X86::BI__builtin_ia32_inserti64x4:

1476 case X86::BI__builtin_ia32_inserti32x4:

1477 case X86::BI__builtin_ia32_insertf32x8:

1478 case X86::BI__builtin_ia32_inserti32x8:

1479 case X86::BI__builtin_ia32_insertf32x4_256:

1480 case X86::BI__builtin_ia32_inserti32x4_256:

1481 case X86::BI__builtin_ia32_insertf64x2_256:

1482 case X86::BI__builtin_ia32_inserti64x2_256:

1483 case X86::BI__builtin_ia32_insertf64x2_512:

1484 case X86::BI__builtin_ia32_inserti64x2_512: {

1485 unsigned DstNumElts =

1487 unsigned SrcNumElts =

1489 unsigned SubVectors = DstNumElts / SrcNumElts;

1491 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");

1492 Index &= SubVectors - 1;

1493 Index *= SrcNumElts;

1494

1495 int Indices[16];

1496 for (unsigned i = 0; i != DstNumElts; ++i)

1497 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;

1498

1500 Ops[1], ArrayRef(Indices, DstNumElts), "widen");

1501

1502 for (unsigned i = 0; i != DstNumElts; ++i) {

1503 if (i >= Index && i < (Index + SrcNumElts))

1504 Indices[i] = (i - Index) + DstNumElts;

1505 else

1506 Indices[i] = i;

1507 }

1508

1509 return Builder.CreateShuffleVector(Ops[0], Op1,

1510 ArrayRef(Indices, DstNumElts), "insert");

1511 }

1512 case X86::BI__builtin_ia32_pmovqd512_mask:

1513 case X86::BI__builtin_ia32_pmovwb512_mask: {

1515 return EmitX86Select(*this, Ops[2], Res, Ops[1]);

1516 }

1517 case X86::BI__builtin_ia32_pmovdb512_mask:

1518 case X86::BI__builtin_ia32_pmovdw512_mask:

1519 case X86::BI__builtin_ia32_pmovqw512_mask: {

1520 if (const auto *C = dyn_cast(Ops[2]))

1521 if (C->isAllOnesValue())

1522 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());

1523

1524 Intrinsic::ID IID;

1525 switch (BuiltinID) {

1526 default: llvm_unreachable("Unsupported intrinsic!");

1527 case X86::BI__builtin_ia32_pmovdb512_mask:

1528 IID = Intrinsic::x86_avx512_mask_pmov_db_512;

1529 break;

1530 case X86::BI__builtin_ia32_pmovdw512_mask:

1531 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;

1532 break;

1533 case X86::BI__builtin_ia32_pmovqw512_mask:

1534 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;

1535 break;

1536 }

1537

1539 return Builder.CreateCall(Intr, Ops);

1540 }

1541 case X86::BI__builtin_ia32_pblendw128:

1542 case X86::BI__builtin_ia32_blendpd:

1543 case X86::BI__builtin_ia32_blendps:

1544 case X86::BI__builtin_ia32_blendpd256:

1545 case X86::BI__builtin_ia32_blendps256:

1546 case X86::BI__builtin_ia32_pblendw256:

1547 case X86::BI__builtin_ia32_pblendd128:

1548 case X86::BI__builtin_ia32_pblendd256: {

1549 unsigned NumElts =

1552

1553 int Indices[16];

1554

1555

1556 for (unsigned i = 0; i != NumElts; ++i)

1557 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;

1558

1559 return Builder.CreateShuffleVector(Ops[0], Ops[1],

1560 ArrayRef(Indices, NumElts), "blend");

1561 }

1562 case X86::BI__builtin_ia32_pshuflw:

1563 case X86::BI__builtin_ia32_pshuflw256:

1564 case X86::BI__builtin_ia32_pshuflw512: {

1567 unsigned NumElts = Ty->getNumElements();

1568

1569

1570 Imm = (Imm & 0xff) * 0x01010101;

1571

1572 int Indices[32];

1573 for (unsigned l = 0; l != NumElts; l += 8) {

1574 for (unsigned i = 0; i != 4; ++i) {

1575 Indices[l + i] = l + (Imm & 3);

1576 Imm >>= 2;

1577 }

1578 for (unsigned i = 4; i != 8; ++i)

1579 Indices[l + i] = l + i;

1580 }

1581

1582 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),

1583 "pshuflw");

1584 }

1585 case X86::BI__builtin_ia32_pshufhw:

1586 case X86::BI__builtin_ia32_pshufhw256:

1587 case X86::BI__builtin_ia32_pshufhw512: {

1590 unsigned NumElts = Ty->getNumElements();

1591

1592

1593 Imm = (Imm & 0xff) * 0x01010101;

1594

1595 int Indices[32];

1596 for (unsigned l = 0; l != NumElts; l += 8) {

1597 for (unsigned i = 0; i != 4; ++i)

1598 Indices[l + i] = l + i;

1599 for (unsigned i = 4; i != 8; ++i) {

1600 Indices[l + i] = l + 4 + (Imm & 3);

1601 Imm >>= 2;

1602 }

1603 }

1604

1605 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),

1606 "pshufhw");

1607 }

1608 case X86::BI__builtin_ia32_pshufd:

1609 case X86::BI__builtin_ia32_pshufd256:

1610 case X86::BI__builtin_ia32_pshufd512:

1611 case X86::BI__builtin_ia32_vpermilpd:

1612 case X86::BI__builtin_ia32_vpermilps:

1613 case X86::BI__builtin_ia32_vpermilpd256:

1614 case X86::BI__builtin_ia32_vpermilps256:

1615 case X86::BI__builtin_ia32_vpermilpd512:

1616 case X86::BI__builtin_ia32_vpermilps512: {

1619 unsigned NumElts = Ty->getNumElements();

1620 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;

1621 unsigned NumLaneElts = NumElts / NumLanes;

1622

1623

1624 Imm = (Imm & 0xff) * 0x01010101;

1625

1626 int Indices[16];

1627 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {

1628 for (unsigned i = 0; i != NumLaneElts; ++i) {

1629 Indices[i + l] = (Imm % NumLaneElts) + l;

1630 Imm /= NumLaneElts;

1631 }

1632 }

1633

1634 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),

1635 "permil");

1636 }

1637 case X86::BI__builtin_ia32_shufpd:

1638 case X86::BI__builtin_ia32_shufpd256:

1639 case X86::BI__builtin_ia32_shufpd512:

1640 case X86::BI__builtin_ia32_shufps:

1641 case X86::BI__builtin_ia32_shufps256:

1642 case X86::BI__builtin_ia32_shufps512: {

1645 unsigned NumElts = Ty->getNumElements();

1646 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;

1647 unsigned NumLaneElts = NumElts / NumLanes;

1648

1649

1650 Imm = (Imm & 0xff) * 0x01010101;

1651

1652 int Indices[16];

1653 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {

1654 for (unsigned i = 0; i != NumLaneElts; ++i) {

1655 unsigned Index = Imm % NumLaneElts;

1656 Imm /= NumLaneElts;

1657 if (i >= (NumLaneElts / 2))

1658 Index += NumElts;

1659 Indices[l + i] = l + Index;

1660 }

1661 }

1662

1663 return Builder.CreateShuffleVector(Ops[0], Ops[1],

1664 ArrayRef(Indices, NumElts), "shufp");

1665 }

1666 case X86::BI__builtin_ia32_permdi256:

1667 case X86::BI__builtin_ia32_permdf256:

1668 case X86::BI__builtin_ia32_permdi512:

1669 case X86::BI__builtin_ia32_permdf512: {

1672 unsigned NumElts = Ty->getNumElements();

1673

1674

1675 int Indices[8];

1676 for (unsigned l = 0; l != NumElts; l += 4)

1677 for (unsigned i = 0; i != 4; ++i)

1678 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);

1679

1680 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),

1681 "perm");

1682 }

1683 case X86::BI__builtin_ia32_palignr128:

1684 case X86::BI__builtin_ia32_palignr256:

1685 case X86::BI__builtin_ia32_palignr512: {

1687

1688 unsigned NumElts =

1690 assert(NumElts % 16 == 0);

1691

1692

1693

1694 if (ShiftVal >= 32)

1696

1697

1698

1699 if (ShiftVal > 16) {

1700 ShiftVal -= 16;

1701 Ops[1] = Ops[0];

1702 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());

1703 }

1704

1705 int Indices[64];

1706

1707 for (unsigned l = 0; l != NumElts; l += 16) {

1708 for (unsigned i = 0; i != 16; ++i) {

1709 unsigned Idx = ShiftVal + i;

1710 if (Idx >= 16)

1711 Idx += NumElts - 16;

1712 Indices[l + i] = Idx + l;

1713 }

1714 }

1715

1716 return Builder.CreateShuffleVector(Ops[1], Ops[0],

1717 ArrayRef(Indices, NumElts), "palignr");

1718 }

1719 case X86::BI__builtin_ia32_alignd128:

1720 case X86::BI__builtin_ia32_alignd256:

1721 case X86::BI__builtin_ia32_alignd512:

1722 case X86::BI__builtin_ia32_alignq128:

1723 case X86::BI__builtin_ia32_alignq256:

1724 case X86::BI__builtin_ia32_alignq512: {

1725 unsigned NumElts =

1728

1729

1730 ShiftVal &= NumElts - 1;

1731

1732 int Indices[16];

1733 for (unsigned i = 0; i != NumElts; ++i)

1734 Indices[i] = i + ShiftVal;

1735

1736 return Builder.CreateShuffleVector(Ops[1], Ops[0],

1737 ArrayRef(Indices, NumElts), "valign");

1738 }

1739 case X86::BI__builtin_ia32_shuf_f32x4_256:

1740 case X86::BI__builtin_ia32_shuf_f64x2_256:

1741 case X86::BI__builtin_ia32_shuf_i32x4_256:

1742 case X86::BI__builtin_ia32_shuf_i64x2_256:

1743 case X86::BI__builtin_ia32_shuf_f32x4:

1744 case X86::BI__builtin_ia32_shuf_f64x2:

1745 case X86::BI__builtin_ia32_shuf_i32x4:

1746 case X86::BI__builtin_ia32_shuf_i64x2: {

1749 unsigned NumElts = Ty->getNumElements();

1750 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;

1751 unsigned NumLaneElts = NumElts / NumLanes;

1752

1753 int Indices[16];

1754 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {

1755 unsigned Index = (Imm % NumLanes) * NumLaneElts;

1756 Imm /= NumLanes;

1757 if (l >= (NumElts / 2))

1758 Index += NumElts;

1759 for (unsigned i = 0; i != NumLaneElts; ++i) {

1760 Indices[l + i] = Index + i;

1761 }

1762 }

1763

1764 return Builder.CreateShuffleVector(Ops[0], Ops[1],

1765 ArrayRef(Indices, NumElts), "shuf");

1766 }

1767

1768 case X86::BI__builtin_ia32_vperm2f128_pd256:

1769 case X86::BI__builtin_ia32_vperm2f128_ps256:

1770 case X86::BI__builtin_ia32_vperm2f128_si256:

1771 case X86::BI__builtin_ia32_permti256: {

1773 unsigned NumElts =

1775

1776

1777

1778

1779

1780

1781 Value *OutOps[2];

1782 int Indices[8];

1783 for (unsigned l = 0; l != 2; ++l) {

1784

1785 if (Imm & (1 << ((l * 4) + 3)))

1786 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());

1787 else if (Imm & (1 << ((l * 4) + 1)))

1788 OutOps[l] = Ops[1];

1789 else

1790 OutOps[l] = Ops[0];

1791

1792 for (unsigned i = 0; i != NumElts/2; ++i) {

1793

1794 unsigned Idx = (l * NumElts) + i;

1795

1796

1797 if (Imm & (1 << (l * 4)))

1798 Idx += NumElts/2;

1799 Indices[(l * (NumElts/2)) + i] = Idx;

1800 }

1801 }

1802

1803 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],

1804 ArrayRef(Indices, NumElts), "vperm");

1805 }

1806

1807 case X86::BI__builtin_ia32_pslldqi128_byteshift:

1808 case X86::BI__builtin_ia32_pslldqi256_byteshift:

1809 case X86::BI__builtin_ia32_pslldqi512_byteshift: {

1812

1813 unsigned NumElts = VecTy->getNumElements();

1814 Value *Zero = llvm::Constant::getNullValue(VecTy);

1815

1816

1817 if (ShiftVal >= 16)

1818 return Zero;

1819

1820 int Indices[64];

1821

1822 for (unsigned l = 0; l != NumElts; l += 16) {

1823 for (unsigned i = 0; i != 16; ++i) {

1824 unsigned Idx = NumElts + i - ShiftVal;

1825 if (Idx < NumElts)

1826 Idx -= NumElts - 16;

1827 Indices[l + i] = Idx + l;

1828 }

1829 }

1830 return Builder.CreateShuffleVector(Zero, Ops[0], ArrayRef(Indices, NumElts),

1831 "pslldq");

1832 }

1833 case X86::BI__builtin_ia32_psrldqi128_byteshift:

1834 case X86::BI__builtin_ia32_psrldqi256_byteshift:

1835 case X86::BI__builtin_ia32_psrldqi512_byteshift: {

1838

1839 unsigned NumElts = VecTy->getNumElements();

1840 Value *Zero = llvm::Constant::getNullValue(VecTy);

1841

1842

1843 if (ShiftVal >= 16)

1844 return Zero;

1845

1846 int Indices[64];

1847

1848 for (unsigned l = 0; l != NumElts; l += 16) {

1849 for (unsigned i = 0; i != 16; ++i) {

1850 unsigned Idx = i + ShiftVal;

1851 if (Idx >= 16)

1852 Idx += NumElts - 16;

1853 Indices[l + i] = Idx + l;

1854 }

1855 }

1856 return Builder.CreateShuffleVector(Ops[0], Zero, ArrayRef(Indices, NumElts),

1857 "psrldq");

1858 }

1859 case X86::BI__builtin_ia32_kshiftliqi:

1860 case X86::BI__builtin_ia32_kshiftlihi:

1861 case X86::BI__builtin_ia32_kshiftlisi:

1862 case X86::BI__builtin_ia32_kshiftlidi: {

1864 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

1865

1866 if (ShiftVal >= NumElts)

1867 return llvm::Constant::getNullValue(Ops[0]->getType());

1868

1870

1871 int Indices[64];

1872 for (unsigned i = 0; i != NumElts; ++i)

1873 Indices[i] = NumElts + i - ShiftVal;

1874

1875 Value *Zero = llvm::Constant::getNullValue(In->getType());

1877 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");

1878 return Builder.CreateBitCast(SV, Ops[0]->getType());

1879 }

1880 case X86::BI__builtin_ia32_kshiftriqi:

1881 case X86::BI__builtin_ia32_kshiftrihi:

1882 case X86::BI__builtin_ia32_kshiftrisi:

1883 case X86::BI__builtin_ia32_kshiftridi: {

1885 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

1886

1887 if (ShiftVal >= NumElts)

1888 return llvm::Constant::getNullValue(Ops[0]->getType());

1889

1891

1892 int Indices[64];

1893 for (unsigned i = 0; i != NumElts; ++i)

1894 Indices[i] = i + ShiftVal;

1895

1896 Value *Zero = llvm::Constant::getNullValue(In->getType());

1898 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");

1899 return Builder.CreateBitCast(SV, Ops[0]->getType());

1900 }

1901 case X86::BI__builtin_ia32_movnti:

1902 case X86::BI__builtin_ia32_movnti64:

1903 case X86::BI__builtin_ia32_movntsd:

1904 case X86::BI__builtin_ia32_movntss: {

1905 llvm::MDNode *Node = llvm::MDNode::get(

1907

1908 Value *Ptr = Ops[0];

1909 Value *Src = Ops[1];

1910

1911

1912 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||

1913 BuiltinID == X86::BI__builtin_ia32_movntss)

1914 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");

1915

1916

1917 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);

1918 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);

1919 SI->setAlignment(llvm::Align(1));

1920 return SI;

1921 }

1922

1923 case X86::BI__builtin_ia32_vprotbi:

1924 case X86::BI__builtin_ia32_vprotwi:

1925 case X86::BI__builtin_ia32_vprotdi:

1926 case X86::BI__builtin_ia32_vprotqi:

1927 case X86::BI__builtin_ia32_prold128:

1928 case X86::BI__builtin_ia32_prold256:

1929 case X86::BI__builtin_ia32_prold512:

1930 case X86::BI__builtin_ia32_prolq128:

1931 case X86::BI__builtin_ia32_prolq256:

1932 case X86::BI__builtin_ia32_prolq512:

1934 case X86::BI__builtin_ia32_prord128:

1935 case X86::BI__builtin_ia32_prord256:

1936 case X86::BI__builtin_ia32_prord512:

1937 case X86::BI__builtin_ia32_prorq128:

1938 case X86::BI__builtin_ia32_prorq256:

1939 case X86::BI__builtin_ia32_prorq512:

1941 case X86::BI__builtin_ia32_selectb_128:

1942 case X86::BI__builtin_ia32_selectb_256:

1943 case X86::BI__builtin_ia32_selectb_512:

1944 case X86::BI__builtin_ia32_selectw_128:

1945 case X86::BI__builtin_ia32_selectw_256:

1946 case X86::BI__builtin_ia32_selectw_512:

1947 case X86::BI__builtin_ia32_selectd_128:

1948 case X86::BI__builtin_ia32_selectd_256:

1949 case X86::BI__builtin_ia32_selectd_512:

1950 case X86::BI__builtin_ia32_selectq_128:

1951 case X86::BI__builtin_ia32_selectq_256:

1952 case X86::BI__builtin_ia32_selectq_512:

1953 case X86::BI__builtin_ia32_selectph_128:

1954 case X86::BI__builtin_ia32_selectph_256:

1955 case X86::BI__builtin_ia32_selectph_512:

1956 case X86::BI__builtin_ia32_selectpbf_128:

1957 case X86::BI__builtin_ia32_selectpbf_256:

1958 case X86::BI__builtin_ia32_selectpbf_512:

1959 case X86::BI__builtin_ia32_selectps_128:

1960 case X86::BI__builtin_ia32_selectps_256:

1961 case X86::BI__builtin_ia32_selectps_512:

1962 case X86::BI__builtin_ia32_selectpd_128:

1963 case X86::BI__builtin_ia32_selectpd_256:

1964 case X86::BI__builtin_ia32_selectpd_512:

1965 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);

1966 case X86::BI__builtin_ia32_selectsh_128:

1967 case X86::BI__builtin_ia32_selectsbf_128:

1968 case X86::BI__builtin_ia32_selectss_128:

1969 case X86::BI__builtin_ia32_selectsd_128: {

1970 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);

1971 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);

1973 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);

1974 }

1975 case X86::BI__builtin_ia32_cmpb128_mask:

1976 case X86::BI__builtin_ia32_cmpb256_mask:

1977 case X86::BI__builtin_ia32_cmpb512_mask:

1978 case X86::BI__builtin_ia32_cmpw128_mask:

1979 case X86::BI__builtin_ia32_cmpw256_mask:

1980 case X86::BI__builtin_ia32_cmpw512_mask:

1981 case X86::BI__builtin_ia32_cmpd128_mask:

1982 case X86::BI__builtin_ia32_cmpd256_mask:

1983 case X86::BI__builtin_ia32_cmpd512_mask:

1984 case X86::BI__builtin_ia32_cmpq128_mask:

1985 case X86::BI__builtin_ia32_cmpq256_mask:

1986 case X86::BI__builtin_ia32_cmpq512_mask: {

1989 }

1990 case X86::BI__builtin_ia32_ucmpb128_mask:

1991 case X86::BI__builtin_ia32_ucmpb256_mask:

1992 case X86::BI__builtin_ia32_ucmpb512_mask:

1993 case X86::BI__builtin_ia32_ucmpw128_mask:

1994 case X86::BI__builtin_ia32_ucmpw256_mask:

1995 case X86::BI__builtin_ia32_ucmpw512_mask:

1996 case X86::BI__builtin_ia32_ucmpd128_mask:

1997 case X86::BI__builtin_ia32_ucmpd256_mask:

1998 case X86::BI__builtin_ia32_ucmpd512_mask:

1999 case X86::BI__builtin_ia32_ucmpq128_mask:

2000 case X86::BI__builtin_ia32_ucmpq256_mask:

2001 case X86::BI__builtin_ia32_ucmpq512_mask: {

2004 }

2005 case X86::BI__builtin_ia32_vpcomb:

2006 case X86::BI__builtin_ia32_vpcomw:

2007 case X86::BI__builtin_ia32_vpcomd:

2008 case X86::BI__builtin_ia32_vpcomq:

2010 case X86::BI__builtin_ia32_vpcomub:

2011 case X86::BI__builtin_ia32_vpcomuw:

2012 case X86::BI__builtin_ia32_vpcomud:

2013 case X86::BI__builtin_ia32_vpcomuq:

2015

2016 case X86::BI__builtin_ia32_kortestcqi:

2017 case X86::BI__builtin_ia32_kortestchi:

2018 case X86::BI__builtin_ia32_kortestcsi:

2019 case X86::BI__builtin_ia32_kortestcdi: {

2021 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());

2024 }

2025 case X86::BI__builtin_ia32_kortestzqi:

2026 case X86::BI__builtin_ia32_kortestzhi:

2027 case X86::BI__builtin_ia32_kortestzsi:

2028 case X86::BI__builtin_ia32_kortestzdi: {

2030 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());

2033 }

2034

2035 case X86::BI__builtin_ia32_ktestcqi:

2036 case X86::BI__builtin_ia32_ktestzqi:

2037 case X86::BI__builtin_ia32_ktestchi:

2038 case X86::BI__builtin_ia32_ktestzhi:

2039 case X86::BI__builtin_ia32_ktestcsi:

2040 case X86::BI__builtin_ia32_ktestzsi:

2041 case X86::BI__builtin_ia32_ktestcdi:

2042 case X86::BI__builtin_ia32_ktestzdi: {

2043 Intrinsic::ID IID;

2044 switch (BuiltinID) {

2045 default: llvm_unreachable("Unsupported intrinsic!");

2046 case X86::BI__builtin_ia32_ktestcqi:

2047 IID = Intrinsic::x86_avx512_ktestc_b;

2048 break;

2049 case X86::BI__builtin_ia32_ktestzqi:

2050 IID = Intrinsic::x86_avx512_ktestz_b;

2051 break;

2052 case X86::BI__builtin_ia32_ktestchi:

2053 IID = Intrinsic::x86_avx512_ktestc_w;

2054 break;

2055 case X86::BI__builtin_ia32_ktestzhi:

2056 IID = Intrinsic::x86_avx512_ktestz_w;

2057 break;

2058 case X86::BI__builtin_ia32_ktestcsi:

2059 IID = Intrinsic::x86_avx512_ktestc_d;

2060 break;

2061 case X86::BI__builtin_ia32_ktestzsi:

2062 IID = Intrinsic::x86_avx512_ktestz_d;

2063 break;

2064 case X86::BI__builtin_ia32_ktestcdi:

2065 IID = Intrinsic::x86_avx512_ktestc_q;

2066 break;

2067 case X86::BI__builtin_ia32_ktestzdi:

2068 IID = Intrinsic::x86_avx512_ktestz_q;

2069 break;

2070 }

2071

2072 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

2076 return Builder.CreateCall(Intr, {LHS, RHS});

2077 }

2078

2079 case X86::BI__builtin_ia32_kaddqi:

2080 case X86::BI__builtin_ia32_kaddhi:

2081 case X86::BI__builtin_ia32_kaddsi:

2082 case X86::BI__builtin_ia32_kadddi: {

2083 Intrinsic::ID IID;

2084 switch (BuiltinID) {

2085 default: llvm_unreachable("Unsupported intrinsic!");

2086 case X86::BI__builtin_ia32_kaddqi:

2087 IID = Intrinsic::x86_avx512_kadd_b;

2088 break;

2089 case X86::BI__builtin_ia32_kaddhi:

2090 IID = Intrinsic::x86_avx512_kadd_w;

2091 break;

2092 case X86::BI__builtin_ia32_kaddsi:

2093 IID = Intrinsic::x86_avx512_kadd_d;

2094 break;

2095 case X86::BI__builtin_ia32_kadddi:

2096 IID = Intrinsic::x86_avx512_kadd_q;

2097 break;

2098 }

2099

2100 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

2104 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});

2105 return Builder.CreateBitCast(Res, Ops[0]->getType());

2106 }

2107 case X86::BI__builtin_ia32_kandqi:

2108 case X86::BI__builtin_ia32_kandhi:

2109 case X86::BI__builtin_ia32_kandsi:

2110 case X86::BI__builtin_ia32_kanddi:

2112 case X86::BI__builtin_ia32_kandnqi:

2113 case X86::BI__builtin_ia32_kandnhi:

2114 case X86::BI__builtin_ia32_kandnsi:

2115 case X86::BI__builtin_ia32_kandndi:

2116 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);

2117 case X86::BI__builtin_ia32_korqi:

2118 case X86::BI__builtin_ia32_korhi:

2119 case X86::BI__builtin_ia32_korsi:

2120 case X86::BI__builtin_ia32_kordi:

2122 case X86::BI__builtin_ia32_kxnorqi:

2123 case X86::BI__builtin_ia32_kxnorhi:

2124 case X86::BI__builtin_ia32_kxnorsi:

2125 case X86::BI__builtin_ia32_kxnordi:

2126 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);

2127 case X86::BI__builtin_ia32_kxorqi:

2128 case X86::BI__builtin_ia32_kxorhi:

2129 case X86::BI__builtin_ia32_kxorsi:

2130 case X86::BI__builtin_ia32_kxordi:

2132 case X86::BI__builtin_ia32_knotqi:

2133 case X86::BI__builtin_ia32_knothi:

2134 case X86::BI__builtin_ia32_knotsi:

2135 case X86::BI__builtin_ia32_knotdi: {

2136 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

2138 return Builder.CreateBitCast(Builder.CreateNot(Res),

2139 Ops[0]->getType());

2140 }

2141 case X86::BI__builtin_ia32_kmovb:

2142 case X86::BI__builtin_ia32_kmovw:

2143 case X86::BI__builtin_ia32_kmovd:

2144 case X86::BI__builtin_ia32_kmovq: {

2145

2146

2147

2148 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

2150 return Builder.CreateBitCast(Res, Ops[0]->getType());

2151 }

2152

2153 case X86::BI__builtin_ia32_kunpckdi:

2154 case X86::BI__builtin_ia32_kunpcksi:

2155 case X86::BI__builtin_ia32_kunpckhi: {

2156 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();

2159 int Indices[64];

2160 for (unsigned i = 0; i != NumElts; ++i)

2161 Indices[i] = i;

2162

2163

2164

2165 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));

2166 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));

2167

2168

2170 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));

2171 return Builder.CreateBitCast(Res, Ops[0]->getType());

2172 }

2173

2174 case X86::BI__builtin_ia32_sqrtsh_round_mask:

2175 case X86::BI__builtin_ia32_sqrtsd_round_mask:

2176 case X86::BI__builtin_ia32_sqrtss_round_mask: {

2178

2179

2180 if (CC != 4) {

2181 Intrinsic::ID IID;

2182

2183 switch (BuiltinID) {

2184 default:

2185 llvm_unreachable("Unsupported intrinsic!");

2186 case X86::BI__builtin_ia32_sqrtsh_round_mask:

2187 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;

2188 break;

2189 case X86::BI__builtin_ia32_sqrtsd_round_mask:

2190 IID = Intrinsic::x86_avx512_mask_sqrt_sd;

2191 break;

2192 case X86::BI__builtin_ia32_sqrtss_round_mask:

2193 IID = Intrinsic::x86_avx512_mask_sqrt_ss;

2194 break;

2195 }

2196 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);

2197 }

2198 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);

2200 if (Builder.getIsFPConstrained()) {

2202 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,

2204 A = Builder.CreateConstrainedFPCall(F, A);

2205 } else {

2206 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());

2207 A = Builder.CreateCall(F, A);

2208 }

2209 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);

2211 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);

2212 }

2213 case X86::BI__builtin_ia32_sqrtph512:

2214 case X86::BI__builtin_ia32_sqrtps512:

2215 case X86::BI__builtin_ia32_sqrtpd512: {

2217

2218

2219 if (CC != 4) {

2220 Intrinsic::ID IID;

2221

2222 switch (BuiltinID) {

2223 default:

2224 llvm_unreachable("Unsupported intrinsic!");

2225 case X86::BI__builtin_ia32_sqrtph512:

2226 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;

2227 break;

2228 case X86::BI__builtin_ia32_sqrtps512:

2229 IID = Intrinsic::x86_avx512_sqrt_ps_512;

2230 break;

2231 case X86::BI__builtin_ia32_sqrtpd512:

2232 IID = Intrinsic::x86_avx512_sqrt_pd_512;

2233 break;

2234 }

2235 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);

2236 }

2237 if (Builder.getIsFPConstrained()) {

2239 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,

2241 return Builder.CreateConstrainedFPCall(F, Ops[0]);

2242 } else {

2244 return Builder.CreateCall(F, Ops[0]);

2245 }

2246 }

2247

2248 case X86::BI__builtin_ia32_pmuludq128:

2249 case X86::BI__builtin_ia32_pmuludq256:

2250 case X86::BI__builtin_ia32_pmuludq512:

2251 return EmitX86Muldq(*this, false, Ops);

2252

2253 case X86::BI__builtin_ia32_pmuldq128:

2254 case X86::BI__builtin_ia32_pmuldq256:

2255 case X86::BI__builtin_ia32_pmuldq512:

2256 return EmitX86Muldq(*this, true, Ops);

2257

2258 case X86::BI__builtin_ia32_pternlogd512_mask:

2259 case X86::BI__builtin_ia32_pternlogq512_mask:

2260 case X86::BI__builtin_ia32_pternlogd128_mask:

2261 case X86::BI__builtin_ia32_pternlogd256_mask:

2262 case X86::BI__builtin_ia32_pternlogq128_mask:

2263 case X86::BI__builtin_ia32_pternlogq256_mask:

2264 return EmitX86Ternlog(*this, false, Ops);

2265

2266 case X86::BI__builtin_ia32_pternlogd512_maskz:

2267 case X86::BI__builtin_ia32_pternlogq512_maskz:

2268 case X86::BI__builtin_ia32_pternlogd128_maskz:

2269 case X86::BI__builtin_ia32_pternlogd256_maskz:

2270 case X86::BI__builtin_ia32_pternlogq128_maskz:

2271 case X86::BI__builtin_ia32_pternlogq256_maskz:

2272 return EmitX86Ternlog(*this, true, Ops);

2273

2274 case X86::BI__builtin_ia32_vpshldd128:

2275 case X86::BI__builtin_ia32_vpshldd256:

2276 case X86::BI__builtin_ia32_vpshldd512:

2277 case X86::BI__builtin_ia32_vpshldq128:

2278 case X86::BI__builtin_ia32_vpshldq256:

2279 case X86::BI__builtin_ia32_vpshldq512:

2280 case X86::BI__builtin_ia32_vpshldw128:

2281 case X86::BI__builtin_ia32_vpshldw256:

2282 case X86::BI__builtin_ia32_vpshldw512:

2284

2285 case X86::BI__builtin_ia32_vpshrdd128:

2286 case X86::BI__builtin_ia32_vpshrdd256:

2287 case X86::BI__builtin_ia32_vpshrdd512:

2288 case X86::BI__builtin_ia32_vpshrdq128:

2289 case X86::BI__builtin_ia32_vpshrdq256:

2290 case X86::BI__builtin_ia32_vpshrdq512:

2291 case X86::BI__builtin_ia32_vpshrdw128:

2292 case X86::BI__builtin_ia32_vpshrdw256:

2293 case X86::BI__builtin_ia32_vpshrdw512:

2294

2296

2297

2298 case X86::BI__builtin_ia32_reduce_fadd_pd512:

2299 case X86::BI__builtin_ia32_reduce_fadd_ps512:

2300 case X86::BI__builtin_ia32_reduce_fadd_ph512:

2301 case X86::BI__builtin_ia32_reduce_fadd_ph256:

2302 case X86::BI__builtin_ia32_reduce_fadd_ph128: {

2304 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());

2305 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);

2306 Builder.getFastMathFlags().setAllowReassoc();

2307 return Builder.CreateCall(F, {Ops[0], Ops[1]});

2308 }

2309 case X86::BI__builtin_ia32_reduce_fmul_pd512:

2310 case X86::BI__builtin_ia32_reduce_fmul_ps512:

2311 case X86::BI__builtin_ia32_reduce_fmul_ph512:

2312 case X86::BI__builtin_ia32_reduce_fmul_ph256:

2313 case X86::BI__builtin_ia32_reduce_fmul_ph128: {

2315 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());

2316 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);

2317 Builder.getFastMathFlags().setAllowReassoc();

2318 return Builder.CreateCall(F, {Ops[0], Ops[1]});

2319 }

2320 case X86::BI__builtin_ia32_reduce_fmax_pd512:

2321 case X86::BI__builtin_ia32_reduce_fmax_ps512:

2322 case X86::BI__builtin_ia32_reduce_fmax_ph512:

2323 case X86::BI__builtin_ia32_reduce_fmax_ph256:

2324 case X86::BI__builtin_ia32_reduce_fmax_ph128: {

2326 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());

2327 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);

2328 Builder.getFastMathFlags().setNoNaNs();

2329 return Builder.CreateCall(F, {Ops[0]});

2330 }

2331 case X86::BI__builtin_ia32_reduce_fmin_pd512:

2332 case X86::BI__builtin_ia32_reduce_fmin_ps512:

2333 case X86::BI__builtin_ia32_reduce_fmin_ph512:

2334 case X86::BI__builtin_ia32_reduce_fmin_ph256:

2335 case X86::BI__builtin_ia32_reduce_fmin_ph128: {

2337 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());

2338 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);

2339 Builder.getFastMathFlags().setNoNaNs();

2340 return Builder.CreateCall(F, {Ops[0]});

2341 }

2342

2343 case X86::BI__builtin_ia32_rdrand16_step:

2344 case X86::BI__builtin_ia32_rdrand32_step:

2345 case X86::BI__builtin_ia32_rdrand64_step:

2346 case X86::BI__builtin_ia32_rdseed16_step:

2347 case X86::BI__builtin_ia32_rdseed32_step:

2348 case X86::BI__builtin_ia32_rdseed64_step: {

2349 Intrinsic::ID ID;

2350 switch (BuiltinID) {

2351 default: llvm_unreachable("Unsupported intrinsic!");

2352 case X86::BI__builtin_ia32_rdrand16_step:

2353 ID = Intrinsic::x86_rdrand_16;

2354 break;

2355 case X86::BI__builtin_ia32_rdrand32_step:

2356 ID = Intrinsic::x86_rdrand_32;

2357 break;

2358 case X86::BI__builtin_ia32_rdrand64_step:

2359 ID = Intrinsic::x86_rdrand_64;

2360 break;

2361 case X86::BI__builtin_ia32_rdseed16_step:

2362 ID = Intrinsic::x86_rdseed_16;

2363 break;

2364 case X86::BI__builtin_ia32_rdseed32_step:

2365 ID = Intrinsic::x86_rdseed_32;

2366 break;

2367 case X86::BI__builtin_ia32_rdseed64_step:

2368 ID = Intrinsic::x86_rdseed_64;

2369 break;

2370 }

2371

2373 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),

2374 Ops[0]);

2375 return Builder.CreateExtractValue(Call, 1);

2376 }

2377 case X86::BI__builtin_ia32_addcarryx_u32:

2378 case X86::BI__builtin_ia32_addcarryx_u64:

2379 case X86::BI__builtin_ia32_subborrow_u32:

2380 case X86::BI__builtin_ia32_subborrow_u64: {

2381 Intrinsic::ID IID;

2382 switch (BuiltinID) {

2383 default: llvm_unreachable("Unsupported intrinsic!");

2384 case X86::BI__builtin_ia32_addcarryx_u32:

2385 IID = Intrinsic::x86_addcarry_32;

2386 break;

2387 case X86::BI__builtin_ia32_addcarryx_u64:

2388 IID = Intrinsic::x86_addcarry_64;

2389 break;

2390 case X86::BI__builtin_ia32_subborrow_u32:

2391 IID = Intrinsic::x86_subborrow_32;

2392 break;

2393 case X86::BI__builtin_ia32_subborrow_u64:

2394 IID = Intrinsic::x86_subborrow_64;

2395 break;

2396 }

2397

2399 { Ops[0], Ops[1], Ops[2] });

2400 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),

2401 Ops[3]);

2402 return Builder.CreateExtractValue(Call, 0);

2403 }

2404

2405 case X86::BI__builtin_ia32_fpclassps128_mask:

2406 case X86::BI__builtin_ia32_fpclassps256_mask:

2407 case X86::BI__builtin_ia32_fpclassps512_mask:

2408 case X86::BI__builtin_ia32_vfpclassbf16128_mask:

2409 case X86::BI__builtin_ia32_vfpclassbf16256_mask:

2410 case X86::BI__builtin_ia32_vfpclassbf16512_mask:

2411 case X86::BI__builtin_ia32_fpclassph128_mask:

2412 case X86::BI__builtin_ia32_fpclassph256_mask:

2413 case X86::BI__builtin_ia32_fpclassph512_mask:

2414 case X86::BI__builtin_ia32_fpclasspd128_mask:

2415 case X86::BI__builtin_ia32_fpclasspd256_mask:

2416 case X86::BI__builtin_ia32_fpclasspd512_mask: {

2417 unsigned NumElts =

2419 Value *MaskIn = Ops[2];

2420 Ops.erase(&Ops[2]);

2421

2422 Intrinsic::ID ID;

2423 switch (BuiltinID) {

2424 default: llvm_unreachable("Unsupported intrinsic!");

2425 case X86::BI__builtin_ia32_vfpclassbf16128_mask:

2426 ID = Intrinsic::x86_avx10_fpclass_bf16_128;

2427 break;

2428 case X86::BI__builtin_ia32_vfpclassbf16256_mask:

2429 ID = Intrinsic::x86_avx10_fpclass_bf16_256;

2430 break;

2431 case X86::BI__builtin_ia32_vfpclassbf16512_mask:

2432 ID = Intrinsic::x86_avx10_fpclass_bf16_512;

2433 break;

2434 case X86::BI__builtin_ia32_fpclassph128_mask:

2435 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;

2436 break;

2437 case X86::BI__builtin_ia32_fpclassph256_mask:

2438 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;

2439 break;

2440 case X86::BI__builtin_ia32_fpclassph512_mask:

2441 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;

2442 break;

2443 case X86::BI__builtin_ia32_fpclassps128_mask:

2444 ID = Intrinsic::x86_avx512_fpclass_ps_128;

2445 break;

2446 case X86::BI__builtin_ia32_fpclassps256_mask:

2447 ID = Intrinsic::x86_avx512_fpclass_ps_256;

2448 break;

2449 case X86::BI__builtin_ia32_fpclassps512_mask:

2450 ID = Intrinsic::x86_avx512_fpclass_ps_512;

2451 break;

2452 case X86::BI__builtin_ia32_fpclasspd128_mask:

2453 ID = Intrinsic::x86_avx512_fpclass_pd_128;

2454 break;

2455 case X86::BI__builtin_ia32_fpclasspd256_mask:

2456 ID = Intrinsic::x86_avx512_fpclass_pd_256;

2457 break;

2458 case X86::BI__builtin_ia32_fpclasspd512_mask:

2459 ID = Intrinsic::x86_avx512_fpclass_pd_512;

2460 break;

2461 }

2462

2463 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);

2465 }

2466

2467 case X86::BI__builtin_ia32_vp2intersect_q_512:

2468 case X86::BI__builtin_ia32_vp2intersect_q_256:

2469 case X86::BI__builtin_ia32_vp2intersect_q_128:

2470 case X86::BI__builtin_ia32_vp2intersect_d_512:

2471 case X86::BI__builtin_ia32_vp2intersect_d_256:

2472 case X86::BI__builtin_ia32_vp2intersect_d_128: {

2473 unsigned NumElts =

2475 Intrinsic::ID ID;

2476

2477 switch (BuiltinID) {

2478 default: llvm_unreachable("Unsupported intrinsic!");

2479 case X86::BI__builtin_ia32_vp2intersect_q_512:

2480 ID = Intrinsic::x86_avx512_vp2intersect_q_512;

2481 break;

2482 case X86::BI__builtin_ia32_vp2intersect_q_256:

2483 ID = Intrinsic::x86_avx512_vp2intersect_q_256;

2484 break;

2485 case X86::BI__builtin_ia32_vp2intersect_q_128:

2486 ID = Intrinsic::x86_avx512_vp2intersect_q_128;

2487 break;

2488 case X86::BI__builtin_ia32_vp2intersect_d_512:

2489 ID = Intrinsic::x86_avx512_vp2intersect_d_512;

2490 break;

2491 case X86::BI__builtin_ia32_vp2intersect_d_256:

2492 ID = Intrinsic::x86_avx512_vp2intersect_d_256;

2493 break;

2494 case X86::BI__builtin_ia32_vp2intersect_d_128:

2495 ID = Intrinsic::x86_avx512_vp2intersect_d_128;

2496 break;

2497 }

2498

2499 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});

2502 Builder.CreateDefaultAlignedStore(Result, Ops[2]);

2503

2506 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);

2507 }

2508

2509 case X86::BI__builtin_ia32_vpmultishiftqb128:

2510 case X86::BI__builtin_ia32_vpmultishiftqb256:

2511 case X86::BI__builtin_ia32_vpmultishiftqb512: {

2512 Intrinsic::ID ID;

2513 switch (BuiltinID) {

2514 default: llvm_unreachable("Unsupported intrinsic!");

2515 case X86::BI__builtin_ia32_vpmultishiftqb128:

2516 ID = Intrinsic::x86_avx512_pmultishift_qb_128;

2517 break;

2518 case X86::BI__builtin_ia32_vpmultishiftqb256:

2519 ID = Intrinsic::x86_avx512_pmultishift_qb_256;

2520 break;

2521 case X86::BI__builtin_ia32_vpmultishiftqb512:

2522 ID = Intrinsic::x86_avx512_pmultishift_qb_512;

2523 break;

2524 }

2525

2526 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);

2527 }

2528

2529 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:

2530 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:

2531 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {

2532 unsigned NumElts =

2534 Value *MaskIn = Ops[2];

2535 Ops.erase(&Ops[2]);

2536

2537 Intrinsic::ID ID;

2538 switch (BuiltinID) {

2539 default: llvm_unreachable("Unsupported intrinsic!");

2540 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:

2541 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;

2542 break;

2543 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:

2544 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;

2545 break;

2546 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:

2547 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;

2548 break;

2549 }

2550

2551 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);

2553 }

2554

2555

2556 case X86::BI__builtin_ia32_cmpeqps:

2557 case X86::BI__builtin_ia32_cmpeqpd:

2558 return getVectorFCmpIR(CmpInst::FCMP_OEQ, false);

2559 case X86::BI__builtin_ia32_cmpltps:

2560 case X86::BI__builtin_ia32_cmpltpd:

2561 return getVectorFCmpIR(CmpInst::FCMP_OLT, true);

2562 case X86::BI__builtin_ia32_cmpleps:

2563 case X86::BI__builtin_ia32_cmplepd:

2564 return getVectorFCmpIR(CmpInst::FCMP_OLE, true);

2565 case X86::BI__builtin_ia32_cmpunordps:

2566 case X86::BI__builtin_ia32_cmpunordpd:

2567 return getVectorFCmpIR(CmpInst::FCMP_UNO, false);

2568 case X86::BI__builtin_ia32_cmpneqps:

2569 case X86::BI__builtin_ia32_cmpneqpd:

2570 return getVectorFCmpIR(CmpInst::FCMP_UNE, false);

2571 case X86::BI__builtin_ia32_cmpnltps:

2572 case X86::BI__builtin_ia32_cmpnltpd:

2573 return getVectorFCmpIR(CmpInst::FCMP_UGE, true);

2574 case X86::BI__builtin_ia32_cmpnleps:

2575 case X86::BI__builtin_ia32_cmpnlepd:

2576 return getVectorFCmpIR(CmpInst::FCMP_UGT, true);

2577 case X86::BI__builtin_ia32_cmpordps:

2578 case X86::BI__builtin_ia32_cmpordpd:

2579 return getVectorFCmpIR(CmpInst::FCMP_ORD, false);

2580 case X86::BI__builtin_ia32_cmpph128_mask:

2581 case X86::BI__builtin_ia32_cmpph256_mask:

2582 case X86::BI__builtin_ia32_cmpph512_mask:

2583 case X86::BI__builtin_ia32_cmpps128_mask:

2584 case X86::BI__builtin_ia32_cmpps256_mask:

2585 case X86::BI__builtin_ia32_cmpps512_mask:

2586 case X86::BI__builtin_ia32_cmppd128_mask:

2587 case X86::BI__builtin_ia32_cmppd256_mask:

2588 case X86::BI__builtin_ia32_cmppd512_mask:

2589 case X86::BI__builtin_ia32_vcmpbf16512_mask:

2590 case X86::BI__builtin_ia32_vcmpbf16256_mask:

2591 case X86::BI__builtin_ia32_vcmpbf16128_mask:

2592 IsMaskFCmp = true;

2593 [[fallthrough]];

2594 case X86::BI__builtin_ia32_cmpps:

2595 case X86::BI__builtin_ia32_cmpps256:

2596 case X86::BI__builtin_ia32_cmppd:

2597 case X86::BI__builtin_ia32_cmppd256: {

2598

2599

2600

2601

2602

2603

2604

2606

2607

2608

2609

2610 FCmpInst::Predicate Pred;

2611 bool IsSignaling;

2612

2613

2614 switch (CC & 0xf) {

2615 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;

2616 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;

2617 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;

2618 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;

2619 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;

2620 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;

2621 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;

2622 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;

2623 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;

2624 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;

2625 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;

2626 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;

2627 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;

2628 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;

2629 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;

2630 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;

2631 default: llvm_unreachable("Unhandled CC");

2632 }

2633

2634

2635 if (CC & 0x10)

2636 IsSignaling = !IsSignaling;

2637

2638

2639

2640

2641

2642

2643 if (Builder.getIsFPConstrained() &&

2644 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||

2645 IsMaskFCmp)) {

2646

2647 Intrinsic::ID IID;

2648 switch (BuiltinID) {

2649 default: llvm_unreachable("Unexpected builtin");

2650 case X86::BI__builtin_ia32_cmpps:

2651 IID = Intrinsic::x86_sse_cmp_ps;

2652 break;

2653 case X86::BI__builtin_ia32_cmpps256:

2654 IID = Intrinsic::x86_avx_cmp_ps_256;

2655 break;

2656 case X86::BI__builtin_ia32_cmppd:

2657 IID = Intrinsic::x86_sse2_cmp_pd;

2658 break;

2659 case X86::BI__builtin_ia32_cmppd256:

2660 IID = Intrinsic::x86_avx_cmp_pd_256;

2661 break;

2662 case X86::BI__builtin_ia32_cmpph128_mask:

2663 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;

2664 break;

2665 case X86::BI__builtin_ia32_cmpph256_mask:

2666 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;

2667 break;

2668 case X86::BI__builtin_ia32_cmpph512_mask:

2669 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;

2670 break;

2671 case X86::BI__builtin_ia32_cmpps512_mask:

2672 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;

2673 break;

2674 case X86::BI__builtin_ia32_cmppd512_mask:

2675 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;

2676 break;

2677 case X86::BI__builtin_ia32_cmpps128_mask:

2678 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;

2679 break;

2680 case X86::BI__builtin_ia32_cmpps256_mask:

2681 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;

2682 break;

2683 case X86::BI__builtin_ia32_cmppd128_mask:

2684 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;

2685 break;

2686 case X86::BI__builtin_ia32_cmppd256_mask:

2687 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;

2688 break;

2689 }

2690

2692 if (IsMaskFCmp) {

2693 unsigned NumElts =

2698 }

2699

2700 return Builder.CreateCall(Intr, Ops);

2701 }

2702

2703

2704

2705 if (IsMaskFCmp) {

2706

2707

2708

2709

2710 unsigned NumElts =

2713 if (IsSignaling)

2714 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);

2715 else

2716 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);

2718 }

2719

2720 return getVectorFCmpIR(Pred, IsSignaling);

2721 }

2722

2723

2724 case X86::BI__builtin_ia32_cmpeqss:

2725 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);

2726 case X86::BI__builtin_ia32_cmpltss:

2727 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);

2728 case X86::BI__builtin_ia32_cmpless:

2729 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);

2730 case X86::BI__builtin_ia32_cmpunordss:

2731 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);

2732 case X86::BI__builtin_ia32_cmpneqss:

2733 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);

2734 case X86::BI__builtin_ia32_cmpnltss:

2735 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);

2736 case X86::BI__builtin_ia32_cmpnless:

2737 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);

2738 case X86::BI__builtin_ia32_cmpordss:

2739 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);

2740 case X86::BI__builtin_ia32_cmpeqsd:

2741 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);

2742 case X86::BI__builtin_ia32_cmpltsd:

2743 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);

2744 case X86::BI__builtin_ia32_cmplesd:

2745 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);

2746 case X86::BI__builtin_ia32_cmpunordsd:

2747 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);

2748 case X86::BI__builtin_ia32_cmpneqsd:

2749 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);

2750 case X86::BI__builtin_ia32_cmpnltsd:

2751 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);

2752 case X86::BI__builtin_ia32_cmpnlesd:

2753 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);

2754 case X86::BI__builtin_ia32_cmpordsd:

2755 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);

2756

2757

2758 case X86::BI__builtin_ia32_vcvtph2ps_mask:

2759 case X86::BI__builtin_ia32_vcvtph2ps256_mask:

2760 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {

2763 }

2764

2765

2766 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {

2768 *this, Ops[2],

2770 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;

2771 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);

2772 }

2773

2774 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:

2775 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {

2776 Intrinsic::ID IID;

2777 switch (BuiltinID) {

2778 default: llvm_unreachable("Unsupported intrinsic!");

2779 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:

2780 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;

2781 break;

2782 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:

2783 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;

2784 break;

2785 }

2786 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);

2787 return EmitX86Select(*this, Ops[2], Res, Ops[1]);

2788 }

2789

2790 case X86::BI__cpuid:

2791 case X86::BI__cpuidex: {

2793 Value *SubFuncId = BuiltinID == X86::BI__cpuidex

2795 : llvm::ConstantInt::get(Int32Ty, 0);

2796

2797 llvm::StructType *CpuidRetTy =

2799 llvm::FunctionType *FTy =

2800 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);

2801

2802 StringRef Asm, Constraints;

2803 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {

2804 Asm = "cpuid";

2805 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";

2806 } else {

2807

2808 Asm = "xchgq %rbx, ${1:q}\n"

2809 "cpuid\n"

2810 "xchgq %rbx, ${1:q}";

2811 Constraints = "={ax},=r,={cx},={dx},0,2";

2812 }

2813

2814 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,

2815 false);

2816 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});

2818 Value *Store = nullptr;

2819 for (unsigned i = 0; i < 4; i++) {

2820 Value *Extracted = Builder.CreateExtractValue(IACall, i);

2821 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);

2822 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());

2823 }

2824

2825

2826

2827 return Store;

2828 }

2829

2830 case X86::BI__emul:

2831 case X86::BI__emulu: {

2833 bool isSigned = (BuiltinID == X86::BI__emul);

2836 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);

2837 }

2838 case X86::BI__mulh:

2839 case X86::BI__umulh:

2840 case X86::BI_mul128:

2841 case X86::BI_umul128: {

2843 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);

2844

2845 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);

2846 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);

2847 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);

2848

2849 Value *MulResult, *HigherBits;

2850 if (IsSigned) {

2851 MulResult = Builder.CreateNSWMul(LHS, RHS);

2852 HigherBits = Builder.CreateAShr(MulResult, 64);

2853 } else {

2854 MulResult = Builder.CreateNUWMul(LHS, RHS);

2855 HigherBits = Builder.CreateLShr(MulResult, 64);

2856 }

2857 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);

2858

2859 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)

2860 return HigherBits;

2861

2863 Builder.CreateStore(HigherBits, HighBitsAddress);

2864 return Builder.CreateIntCast(MulResult, ResType, IsSigned);

2865 }

2866

2867 case X86::BI__faststorefence: {

2868 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,

2869 llvm::SyncScope::System);

2870 }

2871 case X86::BI__shiftleft128:

2872 case X86::BI__shiftright128: {

2873 llvm::Function *F = CGM.getIntrinsic(

2874 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,

2876

2877

2878

2879 std::swap(Ops[0], Ops[1]);

2881 return Builder.CreateCall(F, Ops);

2882 }

2883 case X86::BI_ReadWriteBarrier:

2884 case X86::BI_ReadBarrier:

2885 case X86::BI_WriteBarrier: {

2886 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,

2887 llvm::SyncScope::SingleThread);

2888 }

2889

2890 case X86::BI_AddressOfReturnAddress: {

2892 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);

2893 return Builder.CreateCall(F);

2894 }

2895 case X86::BI__stosb: {

2896

2897

2898 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);

2899 }

2900 case X86::BI__ud2:

2901

2903 case X86::BI__int2c: {

2904

2905 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);

2906 llvm::InlineAsm *IA =

2907 llvm::InlineAsm::get(FTy, "int 0x2c", "", true);

2908 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(

2909 getLLVMContext(), llvm::AttributeList::FunctionIndex,

2910 llvm::Attribute::NoReturn);

2911 llvm::CallInst *CI = Builder.CreateCall(IA);

2912 CI->setAttributes(NoReturnAttr);

2913 return CI;

2914 }

2915 case X86::BI__readfsbyte:

2916 case X86::BI__readfsword:

2917 case X86::BI__readfsdword:

2918 case X86::BI__readfsqword: {

2921 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));

2922 LoadInst *Load = Builder.CreateAlignedLoad(

2924 Load->setVolatile(true);

2925 return Load;

2926 }

2927 case X86::BI__readgsbyte:

2928 case X86::BI__readgsword:

2929 case X86::BI__readgsdword:

2930 case X86::BI__readgsqword: {

2933 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));

2934 LoadInst *Load = Builder.CreateAlignedLoad(

2936 Load->setVolatile(true);

2937 return Load;

2938 }

2939 case X86::BI__builtin_ia32_encodekey128_u32: {

2940 Intrinsic::ID IID = Intrinsic::x86_encodekey128;

2941

2942 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});

2943

2944 for (int i = 0; i < 3; ++i) {

2947 Builder.CreateAlignedStore(Extract, Ptr, Align(1));

2948 }

2949

2950 return Builder.CreateExtractValue(Call, 0);

2951 }

2952 case X86::BI__builtin_ia32_encodekey256_u32: {

2953 Intrinsic::ID IID = Intrinsic::x86_encodekey256;

2954

2956 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});

2957

2958 for (int i = 0; i < 4; ++i) {

2961 Builder.CreateAlignedStore(Extract, Ptr, Align(1));

2962 }

2963

2964 return Builder.CreateExtractValue(Call, 0);

2965 }

2966 case X86::BI__builtin_ia32_aesenc128kl_u8:

2967 case X86::BI__builtin_ia32_aesdec128kl_u8:

2968 case X86::BI__builtin_ia32_aesenc256kl_u8:

2969 case X86::BI__builtin_ia32_aesdec256kl_u8: {

2970 Intrinsic::ID IID;

2971 StringRef BlockName;

2972 switch (BuiltinID) {

2973 default:

2974 llvm_unreachable("Unexpected builtin");

2975 case X86::BI__builtin_ia32_aesenc128kl_u8:

2976 IID = Intrinsic::x86_aesenc128kl;

2977 BlockName = "aesenc128kl";

2978 break;

2979 case X86::BI__builtin_ia32_aesdec128kl_u8:

2980 IID = Intrinsic::x86_aesdec128kl;

2981 BlockName = "aesdec128kl";

2982 break;

2983 case X86::BI__builtin_ia32_aesenc256kl_u8:

2984 IID = Intrinsic::x86_aesenc256kl;

2985 BlockName = "aesenc256kl";

2986 break;

2987 case X86::BI__builtin_ia32_aesdec256kl_u8:

2988 IID = Intrinsic::x86_aesdec256kl;

2989 BlockName = "aesdec256kl";

2990 break;

2991 }

2992

2993 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});

2994

2995 BasicBlock *NoError =

2999

3004

3005 Builder.SetInsertPoint(NoError);

3006 Builder.CreateDefaultAlignedStore(Out, Ops[0]);

3008

3010 Constant *Zero = llvm::Constant::getNullValue(Out->getType());

3011 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);

3013

3014 Builder.SetInsertPoint(End);

3015 return Builder.CreateExtractValue(Call, 0);

3016 }

3017 case X86::BI__builtin_ia32_aesencwide128kl_u8:

3018 case X86::BI__builtin_ia32_aesdecwide128kl_u8:

3019 case X86::BI__builtin_ia32_aesencwide256kl_u8:

3020 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {

3021 Intrinsic::ID IID;

3022 StringRef BlockName;

3023 switch (BuiltinID) {

3024 case X86::BI__builtin_ia32_aesencwide128kl_u8:

3025 IID = Intrinsic::x86_aesencwide128kl;

3026 BlockName = "aesencwide128kl";

3027 break;

3028 case X86::BI__builtin_ia32_aesdecwide128kl_u8:

3029 IID = Intrinsic::x86_aesdecwide128kl;

3030 BlockName = "aesdecwide128kl";

3031 break;

3032 case X86::BI__builtin_ia32_aesencwide256kl_u8:

3033 IID = Intrinsic::x86_aesencwide256kl;

3034 BlockName = "aesencwide256kl";

3035 break;

3036 case X86::BI__builtin_ia32_aesdecwide256kl_u8:

3037 IID = Intrinsic::x86_aesdecwide256kl;

3038 BlockName = "aesdecwide256kl";

3039 break;

3040 }

3041

3042 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);

3043 Value *InOps[9];

3044 InOps[0] = Ops[2];

3045 for (int i = 0; i != 8; ++i) {

3046 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);

3047 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));

3048 }

3049

3051

3052 BasicBlock *NoError =

3056

3060

3061 Builder.SetInsertPoint(NoError);

3062 for (int i = 0; i != 8; ++i) {

3065 Builder.CreateAlignedStore(Extract, Ptr, Align(16));

3066 }

3068

3070 for (int i = 0; i != 8; ++i) {

3072 Constant *Zero = llvm::Constant::getNullValue(Out->getType());

3073 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);

3074 Builder.CreateAlignedStore(Zero, Ptr, Align(16));

3075 }

3077

3078 Builder.SetInsertPoint(End);

3079 return Builder.CreateExtractValue(Call, 0);

3080 }

3081 case X86::BI__builtin_ia32_vfcmaddcph512_mask:

3082 IsConjFMA = true;

3083 [[fallthrough]];

3084 case X86::BI__builtin_ia32_vfmaddcph512_mask: {

3085 Intrinsic::ID IID = IsConjFMA

3086 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512

3087 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;

3090 }

3091 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:

3092 IsConjFMA = true;

3093 [[fallthrough]];

3094 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {

3095 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh

3096 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;

3100 }

3101 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:

3102 IsConjFMA = true;

3103 [[fallthrough]];

3104 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {

3105 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh

3106 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;

3108 static constexpr int Mask[] = {0, 5, 6, 7};

3109 return Builder.CreateShuffleVector(Call, Ops[2], Mask);

3110 }

3111 case X86::BI__builtin_ia32_prefetchi:

3112 return Builder.CreateCall(

3113 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),

3114 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],

3115 llvm::ConstantInt::get(Int32Ty, 0)});

3116 }

3117}