clang: lib/CodeGen/TargetBuiltins/X86.cpp Source File (original) (raw)
26 switch (BuiltinID) {
27 default:
28 return std::nullopt;
29 case clang::X86::BI_BitScanForward:
30 case clang::X86::BI_BitScanForward64:
31 return MSVCIntrin::_BitScanForward;
32 case clang::X86::BI_BitScanReverse:
33 case clang::X86::BI_BitScanReverse64:
34 return MSVCIntrin::_BitScanReverse;
35 case clang::X86::BI_InterlockedAnd64:
36 return MSVCIntrin::_InterlockedAnd;
37 case clang::X86::BI_InterlockedCompareExchange128:
38 return MSVCIntrin::_InterlockedCompareExchange128;
39 case clang::X86::BI_InterlockedExchange64:
40 return MSVCIntrin::_InterlockedExchange;
41 case clang::X86::BI_InterlockedExchangeAdd64:
42 return MSVCIntrin::_InterlockedExchangeAdd;
43 case clang::X86::BI_InterlockedExchangeSub64:
44 return MSVCIntrin::_InterlockedExchangeSub;
45 case clang::X86::BI_InterlockedOr64:
46 return MSVCIntrin::_InterlockedOr;
47 case clang::X86::BI_InterlockedXor64:
48 return MSVCIntrin::_InterlockedXor;
49 case clang::X86::BI_InterlockedDecrement64:
50 return MSVCIntrin::_InterlockedDecrement;
51 case clang::X86::BI_InterlockedIncrement64:
52 return MSVCIntrin::_InterlockedIncrement;
53 }
54 llvm_unreachable("must return from switch");
55}
59 unsigned NumElts) {
60
61 auto *MaskTy = llvm::FixedVectorType::get(
64 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
65
66
67
68 if (NumElts < 8) {
69 int Indices[4];
70 for (unsigned i = 0; i != NumElts; ++i)
71 Indices[i] = i;
72 MaskVec = CGF.Builder.CreateShuffleVector(
73 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
74 }
75 return MaskVec;
76}
79 Align Alignment) {
80 Value *Ptr = Ops[0];
81
83 CGF, Ops[2],
85
86 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
87}
90 Align Alignment) {
91 llvm::Type *Ty = Ops[1]->getType();
92 Value *Ptr = Ops[0];
93
96
97 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
98}
103 Value *Ptr = Ops[0];
104
107
108 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
109 ResultTy);
110 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
111}
115 bool IsCompress) {
117
119
120 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
121 : Intrinsic::x86_avx512_mask_expand;
123 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
124}
129 Value *Ptr = Ops[0];
130
132
133 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
134 ResultTy);
135 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
136}
140 bool InvertLHS = false) {
141 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
144
145 if (InvertLHS)
146 LHS = CGF.Builder.CreateNot(LHS);
147
148 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
149 Ops[0]->getType());
150}
153 Value *Amt, bool IsRight) {
154 llvm::Type *Ty = Op0->getType();
155
156
157
158
159 if (Amt->getType() != Ty) {
161 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
162 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
163 }
164
165 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
167 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
168}
171 bool IsSigned) {
172 Value *Op0 = Ops[0];
173 Value *Op1 = Ops[1];
174 llvm::Type *Ty = Op0->getType();
176
177 CmpInst::Predicate Pred;
178 switch (Imm) {
179 case 0x0:
180 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
181 break;
182 case 0x1:
183 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
184 break;
185 case 0x2:
186 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
187 break;
188 case 0x3:
189 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
190 break;
191 case 0x4:
192 Pred = ICmpInst::ICMP_EQ;
193 break;
194 case 0x5:
195 Pred = ICmpInst::ICMP_NE;
196 break;
197 case 0x6:
198 return llvm::Constant::getNullValue(Ty);
199 case 0x7:
200 return llvm::Constant::getAllOnesValue(Ty);
201 default:
202 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
203 }
204
205 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
206 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
207 return Res;
208}
212
213
214 if (const auto *C = dyn_cast(Mask))
215 if (C->isAllOnesValue())
216 return Op0;
217
220
221 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
222}
226
227 if (const auto *C = dyn_cast(Mask))
228 if (C->isAllOnesValue())
229 return Op0;
230
231 auto *MaskTy = llvm::FixedVectorType::get(
232 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
233 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
234 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
235 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
236}
239 unsigned NumElts, Value *MaskIn) {
240 if (MaskIn) {
241 const auto *C = dyn_cast(MaskIn);
242 if ( ||
->isAllOnesValue())
244 }
245
246 if (NumElts < 8) {
247 int Indices[8];
248 for (unsigned i = 0; i != NumElts; ++i)
249 Indices[i] = i;
250 for (unsigned i = NumElts; i != 8; ++i)
251 Indices[i] = i % NumElts + NumElts;
252 Cmp = CGF.Builder.CreateShuffleVector(
253 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
254 }
255
256 return CGF.Builder.CreateBitCast(Cmp,
258 std::max(NumElts, 8U)));
259}
263 assert((Ops.size() == 2 || Ops.size() == 4) &&
264 "Unexpected number of arguments");
265 unsigned NumElts =
268
269 if (CC == 3) {
270 Cmp = Constant::getNullValue(
271 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
272 } else if (CC == 7) {
273 Cmp = Constant::getAllOnesValue(
274 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
275 } else {
276 ICmpInst::Predicate Pred;
277 switch (CC) {
278 default: llvm_unreachable("Unknown condition code");
279 case 0: Pred = ICmpInst::ICMP_EQ; break;
280 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
281 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
282 case 4: Pred = ICmpInst::ICMP_NE; break;
283 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
284 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
285 }
286 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
287 }
288
289 Value *MaskIn = nullptr;
290 if (Ops.size() == 4)
291 MaskIn = Ops[3];
292
294}
297 Value *Zero = Constant::getNullValue(In->getType());
299}
304 llvm::Type *Ty = Ops[1]->getType();
305
307 if (Rnd != 4) {
308 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
309 : Intrinsic::x86_avx512_uitofp_round;
310 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
311 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
312 } else {
314 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
315 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
316 }
317
319}
324 bool IsAddSub) {
325
326 bool Subtract = false;
327 Intrinsic::ID IID = Intrinsic::not_intrinsic;
328 switch (BuiltinID) {
329 default: break;
330 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
331 Subtract = true;
332 [[fallthrough]];
333 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
334 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
335 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
336 IID = Intrinsic::x86_avx512fp16_vfmadd_ph_512;
337 break;
338 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
339 Subtract = true;
340 [[fallthrough]];
341 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
342 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
343 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
344 IID = Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
345 break;
346 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
347 Subtract = true;
348 [[fallthrough]];
349 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
350 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
351 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
352 IID = Intrinsic::x86_avx512_vfmadd_ps_512; break;
353 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
354 Subtract = true;
355 [[fallthrough]];
356 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
357 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
358 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
359 IID = Intrinsic::x86_avx512_vfmadd_pd_512; break;
360 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
361 Subtract = true;
362 [[fallthrough]];
363 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
364 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
365 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
366 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
367 break;
368 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
369 Subtract = true;
370 [[fallthrough]];
371 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
372 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
373 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
374 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
375 break;
376 }
377
378 Value *A = Ops[0];
379 Value *B = Ops[1];
381
382 if (Subtract)
384
386
387
388 if (IID != Intrinsic::not_intrinsic &&
390 IsAddSub)) {
392 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
393 } else {
394 llvm::Type *Ty = A->getType();
395 Function *FMA;
396 if (CGF.Builder.getIsFPConstrained()) {
398 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
399 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
400 } else {
402 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
403 }
404 }
405
406
407 Value *MaskFalseVal = nullptr;
408 switch (BuiltinID) {
409 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
410 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
411 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
412 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
413 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
414 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
415 MaskFalseVal = Ops[0];
416 break;
417 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
418 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
419 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
420 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
421 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
422 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
423 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
424 break;
425 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
426 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
427 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
428 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
429 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
430 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
431 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
432 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
433 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
434 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
435 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
436 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
437 MaskFalseVal = Ops[2];
438 break;
439 }
440
441 if (MaskFalseVal)
442 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
443
444 return Res;
445}
449 bool ZeroMask = false, unsigned PTIdx = 0,
450 bool NegAcc = false) {
451 unsigned Rnd = 4;
452 if (Ops.size() > 4)
454
455 if (NegAcc)
456 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
457
458 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
459 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
460 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
462 if (Rnd != 4) {
463 Intrinsic::ID IID;
464
465 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
466 case 16:
467 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
468 break;
469 case 32:
470 IID = Intrinsic::x86_avx512_vfmadd_f32;
471 break;
472 case 64:
473 IID = Intrinsic::x86_avx512_vfmadd_f64;
474 break;
475 default:
476 llvm_unreachable("Unexpected size");
477 }
479 {Ops[0], Ops[1], Ops[2], Ops[4]});
480 } else if (CGF.Builder.getIsFPConstrained()) {
483 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
484 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
485 } else {
487 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
488 }
489
490 if (Ops.size() > 3) {
491 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
492 : Ops[PTIdx];
493
494
495
496
497 if (NegAcc && PTIdx == 2)
498 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
499
501 }
502 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
503}
507 llvm::Type *Ty = Ops[0]->getType();
508
509 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
510 Ty->getPrimitiveSizeInBits() / 64);
511 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
512 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
513
514 if (IsSigned) {
515
516 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
517 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
518 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
519 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
520 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
521 } else {
522
523 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
524 LHS = CGF.Builder.CreateAnd(LHS, Mask);
525 RHS = CGF.Builder.CreateAnd(RHS, Mask);
526 }
527
528 return CGF.Builder.CreateMul(LHS, RHS);
529}
536 llvm::Type *Ty = Ops[0]->getType();
537
538 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
539 unsigned EltWidth = Ty->getScalarSizeInBits();
540 Intrinsic::ID IID;
541 if (VecWidth == 128 && EltWidth == 32)
542 IID = Intrinsic::x86_avx512_pternlog_d_128;
543 else if (VecWidth == 256 && EltWidth == 32)
544 IID = Intrinsic::x86_avx512_pternlog_d_256;
545 else if (VecWidth == 512 && EltWidth == 32)
546 IID = Intrinsic::x86_avx512_pternlog_d_512;
547 else if (VecWidth == 128 && EltWidth == 64)
548 IID = Intrinsic::x86_avx512_pternlog_q_128;
549 else if (VecWidth == 256 && EltWidth == 64)
550 IID = Intrinsic::x86_avx512_pternlog_q_256;
551 else if (VecWidth == 512 && EltWidth == 64)
552 IID = Intrinsic::x86_avx512_pternlog_q_512;
553 else
554 llvm_unreachable("Unexpected intrinsic");
555
557 Ops.drop_back());
558 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
559 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
560}
563 llvm::Type *DstTy) {
564 unsigned NumberOfElements =
567 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
568}
579 llvm::Type *DstTy) {
580 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
581 "Unknown cvtph2ps intrinsic");
582
583
585 Function *F =
586 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
587 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
588 }
589
591 Value *Src = Ops[0];
592
593
594 if (NumDstElts !=
596 assert(NumDstElts == 4 && "Unexpected vector size");
597 Src = CGF.Builder.CreateShuffleVector(Src, {0, 1, 2, 3});
598 }
599
600
601 auto *HalfTy = llvm::FixedVectorType::get(
602 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
603 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
604
605
606 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
607
608 if (Ops.size() >= 3)
610 return Res;
611}
739 if (BuiltinID == Builtin::BI__builtin_cpu_is)
740 return EmitX86CpuIs(E);
741 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
742 return EmitX86CpuSupports(E);
743 if (BuiltinID == Builtin::BI__builtin_cpu_init)
744 return EmitX86CpuInit();
745
746
747
750
752 bool IsMaskFCmp = false;
753 bool IsConjFMA = false;
754
755
756 unsigned ICEArguments = 0;
760
761 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
763 }
764
765
766
767
768
769
770
771 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
772 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
773 llvm::Function *F = CGM.getIntrinsic(ID);
774 return Builder.CreateCall(F, Ops);
775 };
776
777
778
779
780
781
782 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
783 bool IsSignaling) {
786 if (IsSignaling)
787 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
788 else
789 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
791 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
792 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
793 return Builder.CreateBitCast(Sext, FPVecTy);
794 };
795
796 switch (BuiltinID) {
797 default: return nullptr;
798 case X86::BI_mm_prefetch: {
801 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
802 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
806 }
807 case X86::BI_m_prefetch:
808 case X86::BI_m_prefetchw: {
810
812 ConstantInt::get(Int32Ty, BuiltinID == X86::BI_m_prefetchw ? 1 : 0);
813 Value *Locality = ConstantInt::get(Int32Ty, 0x3);
817 }
818 case X86::BI_mm_clflush: {
819 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
820 Ops[0]);
821 }
822 case X86::BI_mm_lfence: {
823 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
824 }
825 case X86::BI_mm_mfence: {
826 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
827 }
828 case X86::BI_mm_sfence: {
829 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
830 }
831 case X86::BI_mm_pause: {
832 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
833 }
834 case X86::BI__rdtsc: {
835 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
836 }
837 case X86::BI__builtin_ia32_rdtscp: {
838 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
839 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
840 Ops[0]);
841 return Builder.CreateExtractValue(Call, 0);
842 }
843 case X86::BI__builtin_ia32_lzcnt_u16:
844 case X86::BI__builtin_ia32_lzcnt_u32:
845 case X86::BI__builtin_ia32_lzcnt_u64: {
847 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
848 }
849 case X86::BI__builtin_ia32_tzcnt_u16:
850 case X86::BI__builtin_ia32_tzcnt_u32:
851 case X86::BI__builtin_ia32_tzcnt_u64: {
853 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
854 }
855 case X86::BI__builtin_ia32_undef128:
856 case X86::BI__builtin_ia32_undef256:
857 case X86::BI__builtin_ia32_undef512:
858
859
860
861
862
864 case X86::BI__builtin_ia32_vec_ext_v4hi:
865 case X86::BI__builtin_ia32_vec_ext_v16qi:
866 case X86::BI__builtin_ia32_vec_ext_v8hi:
867 case X86::BI__builtin_ia32_vec_ext_v4si:
868 case X86::BI__builtin_ia32_vec_ext_v4sf:
869 case X86::BI__builtin_ia32_vec_ext_v2di:
870 case X86::BI__builtin_ia32_vec_ext_v32qi:
871 case X86::BI__builtin_ia32_vec_ext_v16hi:
872 case X86::BI__builtin_ia32_vec_ext_v8si:
873 case X86::BI__builtin_ia32_vec_ext_v4di: {
874 unsigned NumElts =
877 Index &= NumElts - 1;
878
879
880 return Builder.CreateExtractElement(Ops[0], Index);
881 }
882 case X86::BI__builtin_ia32_vec_set_v4hi:
883 case X86::BI__builtin_ia32_vec_set_v16qi:
884 case X86::BI__builtin_ia32_vec_set_v8hi:
885 case X86::BI__builtin_ia32_vec_set_v4si:
886 case X86::BI__builtin_ia32_vec_set_v2di:
887 case X86::BI__builtin_ia32_vec_set_v32qi:
888 case X86::BI__builtin_ia32_vec_set_v16hi:
889 case X86::BI__builtin_ia32_vec_set_v8si:
890 case X86::BI__builtin_ia32_vec_set_v4di: {
891 unsigned NumElts =
894 Index &= NumElts - 1;
895
896
897 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
898 }
899 case X86::BI_mm_setcsr:
900 case X86::BI__builtin_ia32_ldmxcsr: {
902 Builder.CreateStore(Ops[0], Tmp);
903 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
905 }
906 case X86::BI_mm_getcsr:
907 case X86::BI__builtin_ia32_stmxcsr: {
909 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
911 return Builder.CreateLoad(Tmp, "stmxcsr");
912 }
913 case X86::BI__builtin_ia32_xsave:
914 case X86::BI__builtin_ia32_xsave64:
915 case X86::BI__builtin_ia32_xrstor:
916 case X86::BI__builtin_ia32_xrstor64:
917 case X86::BI__builtin_ia32_xsaveopt:
918 case X86::BI__builtin_ia32_xsaveopt64:
919 case X86::BI__builtin_ia32_xrstors:
920 case X86::BI__builtin_ia32_xrstors64:
921 case X86::BI__builtin_ia32_xsavec:
922 case X86::BI__builtin_ia32_xsavec64:
923 case X86::BI__builtin_ia32_xsaves:
924 case X86::BI__builtin_ia32_xsaves64:
925 case X86::BI__builtin_ia32_xsetbv:
926 case X86::BI_xsetbv: {
927 Intrinsic::ID ID;
928#define INTRINSIC_X86_XSAVE_ID(NAME) \
929 case X86::BI__builtin_ia32_##NAME: \
930 ID = Intrinsic::x86_##NAME; \
931 break
932 switch (BuiltinID) {
933 default: llvm_unreachable("Unsupported intrinsic!");
947 case X86::BI_xsetbv:
948 ID = Intrinsic::x86_xsetbv;
949 break;
950 }
951#undef INTRINSIC_X86_XSAVE_ID
955 Ops[1] = Mhi;
956 Ops.push_back(Mlo);
957 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
958 }
959 case X86::BI__builtin_ia32_xgetbv:
960 case X86::BI_xgetbv:
961 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
962 case X86::BI__builtin_ia32_storedqudi128_mask:
963 case X86::BI__builtin_ia32_storedqusi128_mask:
964 case X86::BI__builtin_ia32_storedquhi128_mask:
965 case X86::BI__builtin_ia32_storedquqi128_mask:
966 case X86::BI__builtin_ia32_storeupd128_mask:
967 case X86::BI__builtin_ia32_storeups128_mask:
968 case X86::BI__builtin_ia32_storedqudi256_mask:
969 case X86::BI__builtin_ia32_storedqusi256_mask:
970 case X86::BI__builtin_ia32_storedquhi256_mask:
971 case X86::BI__builtin_ia32_storedquqi256_mask:
972 case X86::BI__builtin_ia32_storeupd256_mask:
973 case X86::BI__builtin_ia32_storeups256_mask:
974 case X86::BI__builtin_ia32_storedqudi512_mask:
975 case X86::BI__builtin_ia32_storedqusi512_mask:
976 case X86::BI__builtin_ia32_storedquhi512_mask:
977 case X86::BI__builtin_ia32_storedquqi512_mask:
978 case X86::BI__builtin_ia32_storeupd512_mask:
979 case X86::BI__builtin_ia32_storeups512_mask:
981
982 case X86::BI__builtin_ia32_storesbf16128_mask:
983 case X86::BI__builtin_ia32_storesh128_mask:
984 case X86::BI__builtin_ia32_storess128_mask:
985 case X86::BI__builtin_ia32_storesd128_mask:
987
988 case X86::BI__builtin_ia32_cvtmask2b128:
989 case X86::BI__builtin_ia32_cvtmask2b256:
990 case X86::BI__builtin_ia32_cvtmask2b512:
991 case X86::BI__builtin_ia32_cvtmask2w128:
992 case X86::BI__builtin_ia32_cvtmask2w256:
993 case X86::BI__builtin_ia32_cvtmask2w512:
994 case X86::BI__builtin_ia32_cvtmask2d128:
995 case X86::BI__builtin_ia32_cvtmask2d256:
996 case X86::BI__builtin_ia32_cvtmask2d512:
997 case X86::BI__builtin_ia32_cvtmask2q128:
998 case X86::BI__builtin_ia32_cvtmask2q256:
999 case X86::BI__builtin_ia32_cvtmask2q512:
1001
1002 case X86::BI__builtin_ia32_cvtb2mask128:
1003 case X86::BI__builtin_ia32_cvtb2mask256:
1004 case X86::BI__builtin_ia32_cvtb2mask512:
1005 case X86::BI__builtin_ia32_cvtw2mask128:
1006 case X86::BI__builtin_ia32_cvtw2mask256:
1007 case X86::BI__builtin_ia32_cvtw2mask512:
1008 case X86::BI__builtin_ia32_cvtd2mask128:
1009 case X86::BI__builtin_ia32_cvtd2mask256:
1010 case X86::BI__builtin_ia32_cvtd2mask512:
1011 case X86::BI__builtin_ia32_cvtq2mask128:
1012 case X86::BI__builtin_ia32_cvtq2mask256:
1013 case X86::BI__builtin_ia32_cvtq2mask512:
1015
1016 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
1017 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
1018 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
1019 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
1020 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
1021 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
1023 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
1024 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
1025 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
1026 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
1027 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
1028 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
1030
1031 case X86::BI__builtin_ia32_vfmaddsh3_mask:
1032 case X86::BI__builtin_ia32_vfmaddss3_mask:
1033 case X86::BI__builtin_ia32_vfmaddsd3_mask:
1035 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
1036 case X86::BI__builtin_ia32_vfmaddss3_maskz:
1037 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
1038 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], true);
1039 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
1040 case X86::BI__builtin_ia32_vfmaddss3_mask3:
1041 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
1042 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], false, 2);
1043 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
1044 case X86::BI__builtin_ia32_vfmsubss3_mask3:
1045 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
1046 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], false, 2,
1047 true);
1048 case X86::BI__builtin_ia32_vfmaddph512_mask:
1049 case X86::BI__builtin_ia32_vfmaddph512_maskz:
1050 case X86::BI__builtin_ia32_vfmaddph512_mask3:
1051 case X86::BI__builtin_ia32_vfmaddps512_mask:
1052 case X86::BI__builtin_ia32_vfmaddps512_maskz:
1053 case X86::BI__builtin_ia32_vfmaddps512_mask3:
1054 case X86::BI__builtin_ia32_vfmsubps512_mask3:
1055 case X86::BI__builtin_ia32_vfmaddpd512_mask:
1056 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
1057 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1058 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
1059 case X86::BI__builtin_ia32_vfmsubph512_mask3:
1060 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, false);
1061 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
1062 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
1063 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
1064 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
1065 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1066 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1067 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
1068 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
1069 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
1070 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
1071 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
1072 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
1073 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, true);
1074
1075 case X86::BI__builtin_ia32_movdqa32store128_mask:
1076 case X86::BI__builtin_ia32_movdqa64store128_mask:
1077 case X86::BI__builtin_ia32_storeaps128_mask:
1078 case X86::BI__builtin_ia32_storeapd128_mask:
1079 case X86::BI__builtin_ia32_movdqa32store256_mask:
1080 case X86::BI__builtin_ia32_movdqa64store256_mask:
1081 case X86::BI__builtin_ia32_storeaps256_mask:
1082 case X86::BI__builtin_ia32_storeapd256_mask:
1083 case X86::BI__builtin_ia32_movdqa32store512_mask:
1084 case X86::BI__builtin_ia32_movdqa64store512_mask:
1085 case X86::BI__builtin_ia32_storeaps512_mask:
1086 case X86::BI__builtin_ia32_storeapd512_mask:
1088 *this, Ops,
1090
1091 case X86::BI__builtin_ia32_loadups128_mask:
1092 case X86::BI__builtin_ia32_loadups256_mask:
1093 case X86::BI__builtin_ia32_loadups512_mask:
1094 case X86::BI__builtin_ia32_loadupd128_mask:
1095 case X86::BI__builtin_ia32_loadupd256_mask:
1096 case X86::BI__builtin_ia32_loadupd512_mask:
1097 case X86::BI__builtin_ia32_loaddquqi128_mask:
1098 case X86::BI__builtin_ia32_loaddquqi256_mask:
1099 case X86::BI__builtin_ia32_loaddquqi512_mask:
1100 case X86::BI__builtin_ia32_loaddquhi128_mask:
1101 case X86::BI__builtin_ia32_loaddquhi256_mask:
1102 case X86::BI__builtin_ia32_loaddquhi512_mask:
1103 case X86::BI__builtin_ia32_loaddqusi128_mask:
1104 case X86::BI__builtin_ia32_loaddqusi256_mask:
1105 case X86::BI__builtin_ia32_loaddqusi512_mask:
1106 case X86::BI__builtin_ia32_loaddqudi128_mask:
1107 case X86::BI__builtin_ia32_loaddqudi256_mask:
1108 case X86::BI__builtin_ia32_loaddqudi512_mask:
1110
1111 case X86::BI__builtin_ia32_loadsbf16128_mask:
1112 case X86::BI__builtin_ia32_loadsh128_mask:
1113 case X86::BI__builtin_ia32_loadss128_mask:
1114 case X86::BI__builtin_ia32_loadsd128_mask:
1116
1117 case X86::BI__builtin_ia32_loadaps128_mask:
1118 case X86::BI__builtin_ia32_loadaps256_mask:
1119 case X86::BI__builtin_ia32_loadaps512_mask:
1120 case X86::BI__builtin_ia32_loadapd128_mask:
1121 case X86::BI__builtin_ia32_loadapd256_mask:
1122 case X86::BI__builtin_ia32_loadapd512_mask:
1123 case X86::BI__builtin_ia32_movdqa32load128_mask:
1124 case X86::BI__builtin_ia32_movdqa32load256_mask:
1125 case X86::BI__builtin_ia32_movdqa32load512_mask:
1126 case X86::BI__builtin_ia32_movdqa64load128_mask:
1127 case X86::BI__builtin_ia32_movdqa64load256_mask:
1128 case X86::BI__builtin_ia32_movdqa64load512_mask:
1130 *this, Ops,
1132
1133 case X86::BI__builtin_ia32_expandloaddf128_mask:
1134 case X86::BI__builtin_ia32_expandloaddf256_mask:
1135 case X86::BI__builtin_ia32_expandloaddf512_mask:
1136 case X86::BI__builtin_ia32_expandloadsf128_mask:
1137 case X86::BI__builtin_ia32_expandloadsf256_mask:
1138 case X86::BI__builtin_ia32_expandloadsf512_mask:
1139 case X86::BI__builtin_ia32_expandloaddi128_mask:
1140 case X86::BI__builtin_ia32_expandloaddi256_mask:
1141 case X86::BI__builtin_ia32_expandloaddi512_mask:
1142 case X86::BI__builtin_ia32_expandloadsi128_mask:
1143 case X86::BI__builtin_ia32_expandloadsi256_mask:
1144 case X86::BI__builtin_ia32_expandloadsi512_mask:
1145 case X86::BI__builtin_ia32_expandloadhi128_mask:
1146 case X86::BI__builtin_ia32_expandloadhi256_mask:
1147 case X86::BI__builtin_ia32_expandloadhi512_mask:
1148 case X86::BI__builtin_ia32_expandloadqi128_mask:
1149 case X86::BI__builtin_ia32_expandloadqi256_mask:
1150 case X86::BI__builtin_ia32_expandloadqi512_mask:
1152
1153 case X86::BI__builtin_ia32_compressstoredf128_mask:
1154 case X86::BI__builtin_ia32_compressstoredf256_mask:
1155 case X86::BI__builtin_ia32_compressstoredf512_mask:
1156 case X86::BI__builtin_ia32_compressstoresf128_mask:
1157 case X86::BI__builtin_ia32_compressstoresf256_mask:
1158 case X86::BI__builtin_ia32_compressstoresf512_mask:
1159 case X86::BI__builtin_ia32_compressstoredi128_mask:
1160 case X86::BI__builtin_ia32_compressstoredi256_mask:
1161 case X86::BI__builtin_ia32_compressstoredi512_mask:
1162 case X86::BI__builtin_ia32_compressstoresi128_mask:
1163 case X86::BI__builtin_ia32_compressstoresi256_mask:
1164 case X86::BI__builtin_ia32_compressstoresi512_mask:
1165 case X86::BI__builtin_ia32_compressstorehi128_mask:
1166 case X86::BI__builtin_ia32_compressstorehi256_mask:
1167 case X86::BI__builtin_ia32_compressstorehi512_mask:
1168 case X86::BI__builtin_ia32_compressstoreqi128_mask:
1169 case X86::BI__builtin_ia32_compressstoreqi256_mask:
1170 case X86::BI__builtin_ia32_compressstoreqi512_mask:
1172
1173 case X86::BI__builtin_ia32_expanddf128_mask:
1174 case X86::BI__builtin_ia32_expanddf256_mask:
1175 case X86::BI__builtin_ia32_expanddf512_mask:
1176 case X86::BI__builtin_ia32_expandsf128_mask:
1177 case X86::BI__builtin_ia32_expandsf256_mask:
1178 case X86::BI__builtin_ia32_expandsf512_mask:
1179 case X86::BI__builtin_ia32_expanddi128_mask:
1180 case X86::BI__builtin_ia32_expanddi256_mask:
1181 case X86::BI__builtin_ia32_expanddi512_mask:
1182 case X86::BI__builtin_ia32_expandsi128_mask:
1183 case X86::BI__builtin_ia32_expandsi256_mask:
1184 case X86::BI__builtin_ia32_expandsi512_mask:
1185 case X86::BI__builtin_ia32_expandhi128_mask:
1186 case X86::BI__builtin_ia32_expandhi256_mask:
1187 case X86::BI__builtin_ia32_expandhi512_mask:
1188 case X86::BI__builtin_ia32_expandqi128_mask:
1189 case X86::BI__builtin_ia32_expandqi256_mask:
1190 case X86::BI__builtin_ia32_expandqi512_mask:
1192
1193 case X86::BI__builtin_ia32_compressdf128_mask:
1194 case X86::BI__builtin_ia32_compressdf256_mask:
1195 case X86::BI__builtin_ia32_compressdf512_mask:
1196 case X86::BI__builtin_ia32_compresssf128_mask:
1197 case X86::BI__builtin_ia32_compresssf256_mask:
1198 case X86::BI__builtin_ia32_compresssf512_mask:
1199 case X86::BI__builtin_ia32_compressdi128_mask:
1200 case X86::BI__builtin_ia32_compressdi256_mask:
1201 case X86::BI__builtin_ia32_compressdi512_mask:
1202 case X86::BI__builtin_ia32_compresssi128_mask:
1203 case X86::BI__builtin_ia32_compresssi256_mask:
1204 case X86::BI__builtin_ia32_compresssi512_mask:
1205 case X86::BI__builtin_ia32_compresshi128_mask:
1206 case X86::BI__builtin_ia32_compresshi256_mask:
1207 case X86::BI__builtin_ia32_compresshi512_mask:
1208 case X86::BI__builtin_ia32_compressqi128_mask:
1209 case X86::BI__builtin_ia32_compressqi256_mask:
1210 case X86::BI__builtin_ia32_compressqi512_mask:
1212
1213 case X86::BI__builtin_ia32_gather3div2df:
1214 case X86::BI__builtin_ia32_gather3div2di:
1215 case X86::BI__builtin_ia32_gather3div4df:
1216 case X86::BI__builtin_ia32_gather3div4di:
1217 case X86::BI__builtin_ia32_gather3div4sf:
1218 case X86::BI__builtin_ia32_gather3div4si:
1219 case X86::BI__builtin_ia32_gather3div8sf:
1220 case X86::BI__builtin_ia32_gather3div8si:
1221 case X86::BI__builtin_ia32_gather3siv2df:
1222 case X86::BI__builtin_ia32_gather3siv2di:
1223 case X86::BI__builtin_ia32_gather3siv4df:
1224 case X86::BI__builtin_ia32_gather3siv4di:
1225 case X86::BI__builtin_ia32_gather3siv4sf:
1226 case X86::BI__builtin_ia32_gather3siv4si:
1227 case X86::BI__builtin_ia32_gather3siv8sf:
1228 case X86::BI__builtin_ia32_gather3siv8si:
1229 case X86::BI__builtin_ia32_gathersiv8df:
1230 case X86::BI__builtin_ia32_gathersiv16sf:
1231 case X86::BI__builtin_ia32_gatherdiv8df:
1232 case X86::BI__builtin_ia32_gatherdiv16sf:
1233 case X86::BI__builtin_ia32_gathersiv8di:
1234 case X86::BI__builtin_ia32_gathersiv16si:
1235 case X86::BI__builtin_ia32_gatherdiv8di:
1236 case X86::BI__builtin_ia32_gatherdiv16si: {
1237 Intrinsic::ID IID;
1238 switch (BuiltinID) {
1239 default: llvm_unreachable("Unexpected builtin");
1240 case X86::BI__builtin_ia32_gather3div2df:
1241 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
1242 break;
1243 case X86::BI__builtin_ia32_gather3div2di:
1244 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
1245 break;
1246 case X86::BI__builtin_ia32_gather3div4df:
1247 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
1248 break;
1249 case X86::BI__builtin_ia32_gather3div4di:
1250 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
1251 break;
1252 case X86::BI__builtin_ia32_gather3div4sf:
1253 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
1254 break;
1255 case X86::BI__builtin_ia32_gather3div4si:
1256 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
1257 break;
1258 case X86::BI__builtin_ia32_gather3div8sf:
1259 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
1260 break;
1261 case X86::BI__builtin_ia32_gather3div8si:
1262 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
1263 break;
1264 case X86::BI__builtin_ia32_gather3siv2df:
1265 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
1266 break;
1267 case X86::BI__builtin_ia32_gather3siv2di:
1268 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
1269 break;
1270 case X86::BI__builtin_ia32_gather3siv4df:
1271 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
1272 break;
1273 case X86::BI__builtin_ia32_gather3siv4di:
1274 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
1275 break;
1276 case X86::BI__builtin_ia32_gather3siv4sf:
1277 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
1278 break;
1279 case X86::BI__builtin_ia32_gather3siv4si:
1280 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
1281 break;
1282 case X86::BI__builtin_ia32_gather3siv8sf:
1283 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
1284 break;
1285 case X86::BI__builtin_ia32_gather3siv8si:
1286 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
1287 break;
1288 case X86::BI__builtin_ia32_gathersiv8df:
1289 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
1290 break;
1291 case X86::BI__builtin_ia32_gathersiv16sf:
1292 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
1293 break;
1294 case X86::BI__builtin_ia32_gatherdiv8df:
1295 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
1296 break;
1297 case X86::BI__builtin_ia32_gatherdiv16sf:
1298 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
1299 break;
1300 case X86::BI__builtin_ia32_gathersiv8di:
1301 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
1302 break;
1303 case X86::BI__builtin_ia32_gathersiv16si:
1304 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
1305 break;
1306 case X86::BI__builtin_ia32_gatherdiv8di:
1307 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
1308 break;
1309 case X86::BI__builtin_ia32_gatherdiv16si:
1310 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
1311 break;
1312 }
1313
1314 unsigned MinElts = std::min(
1319 return Builder.CreateCall(Intr, Ops);
1320 }
1321
1322 case X86::BI__builtin_ia32_scattersiv8df:
1323 case X86::BI__builtin_ia32_scattersiv16sf:
1324 case X86::BI__builtin_ia32_scatterdiv8df:
1325 case X86::BI__builtin_ia32_scatterdiv16sf:
1326 case X86::BI__builtin_ia32_scattersiv8di:
1327 case X86::BI__builtin_ia32_scattersiv16si:
1328 case X86::BI__builtin_ia32_scatterdiv8di:
1329 case X86::BI__builtin_ia32_scatterdiv16si:
1330 case X86::BI__builtin_ia32_scatterdiv2df:
1331 case X86::BI__builtin_ia32_scatterdiv2di:
1332 case X86::BI__builtin_ia32_scatterdiv4df:
1333 case X86::BI__builtin_ia32_scatterdiv4di:
1334 case X86::BI__builtin_ia32_scatterdiv4sf:
1335 case X86::BI__builtin_ia32_scatterdiv4si:
1336 case X86::BI__builtin_ia32_scatterdiv8sf:
1337 case X86::BI__builtin_ia32_scatterdiv8si:
1338 case X86::BI__builtin_ia32_scattersiv2df:
1339 case X86::BI__builtin_ia32_scattersiv2di:
1340 case X86::BI__builtin_ia32_scattersiv4df:
1341 case X86::BI__builtin_ia32_scattersiv4di:
1342 case X86::BI__builtin_ia32_scattersiv4sf:
1343 case X86::BI__builtin_ia32_scattersiv4si:
1344 case X86::BI__builtin_ia32_scattersiv8sf:
1345 case X86::BI__builtin_ia32_scattersiv8si: {
1346 Intrinsic::ID IID;
1347 switch (BuiltinID) {
1348 default: llvm_unreachable("Unexpected builtin");
1349 case X86::BI__builtin_ia32_scattersiv8df:
1350 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
1351 break;
1352 case X86::BI__builtin_ia32_scattersiv16sf:
1353 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
1354 break;
1355 case X86::BI__builtin_ia32_scatterdiv8df:
1356 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
1357 break;
1358 case X86::BI__builtin_ia32_scatterdiv16sf:
1359 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
1360 break;
1361 case X86::BI__builtin_ia32_scattersiv8di:
1362 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
1363 break;
1364 case X86::BI__builtin_ia32_scattersiv16si:
1365 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
1366 break;
1367 case X86::BI__builtin_ia32_scatterdiv8di:
1368 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
1369 break;
1370 case X86::BI__builtin_ia32_scatterdiv16si:
1371 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
1372 break;
1373 case X86::BI__builtin_ia32_scatterdiv2df:
1374 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
1375 break;
1376 case X86::BI__builtin_ia32_scatterdiv2di:
1377 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
1378 break;
1379 case X86::BI__builtin_ia32_scatterdiv4df:
1380 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
1381 break;
1382 case X86::BI__builtin_ia32_scatterdiv4di:
1383 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
1384 break;
1385 case X86::BI__builtin_ia32_scatterdiv4sf:
1386 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
1387 break;
1388 case X86::BI__builtin_ia32_scatterdiv4si:
1389 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
1390 break;
1391 case X86::BI__builtin_ia32_scatterdiv8sf:
1392 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
1393 break;
1394 case X86::BI__builtin_ia32_scatterdiv8si:
1395 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
1396 break;
1397 case X86::BI__builtin_ia32_scattersiv2df:
1398 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
1399 break;
1400 case X86::BI__builtin_ia32_scattersiv2di:
1401 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
1402 break;
1403 case X86::BI__builtin_ia32_scattersiv4df:
1404 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
1405 break;
1406 case X86::BI__builtin_ia32_scattersiv4di:
1407 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
1408 break;
1409 case X86::BI__builtin_ia32_scattersiv4sf:
1410 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
1411 break;
1412 case X86::BI__builtin_ia32_scattersiv4si:
1413 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
1414 break;
1415 case X86::BI__builtin_ia32_scattersiv8sf:
1416 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
1417 break;
1418 case X86::BI__builtin_ia32_scattersiv8si:
1419 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
1420 break;
1421 }
1422
1423 unsigned MinElts = std::min(
1428 return Builder.CreateCall(Intr, Ops);
1429 }
1430
1431 case X86::BI__builtin_ia32_vextractf128_pd256:
1432 case X86::BI__builtin_ia32_vextractf128_ps256:
1433 case X86::BI__builtin_ia32_vextractf128_si256:
1434 case X86::BI__builtin_ia32_extract128i256:
1435 case X86::BI__builtin_ia32_extractf64x4_mask:
1436 case X86::BI__builtin_ia32_extractf32x4_mask:
1437 case X86::BI__builtin_ia32_extracti64x4_mask:
1438 case X86::BI__builtin_ia32_extracti32x4_mask:
1439 case X86::BI__builtin_ia32_extractf32x8_mask:
1440 case X86::BI__builtin_ia32_extracti32x8_mask:
1441 case X86::BI__builtin_ia32_extractf32x4_256_mask:
1442 case X86::BI__builtin_ia32_extracti32x4_256_mask:
1443 case X86::BI__builtin_ia32_extractf64x2_256_mask:
1444 case X86::BI__builtin_ia32_extracti64x2_256_mask:
1445 case X86::BI__builtin_ia32_extractf64x2_512_mask:
1446 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
1448 unsigned NumElts = DstTy->getNumElements();
1449 unsigned SrcNumElts =
1451 unsigned SubVectors = SrcNumElts / NumElts;
1453 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1454 Index &= SubVectors - 1;
1455 Index *= NumElts;
1456
1457 int Indices[16];
1458 for (unsigned i = 0; i != NumElts; ++i)
1459 Indices[i] = i + Index;
1460
1461 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1462 "extract");
1463
1464 if (Ops.size() == 4)
1465 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
1466
1467 return Res;
1468 }
1469 case X86::BI__builtin_ia32_vinsertf128_pd256:
1470 case X86::BI__builtin_ia32_vinsertf128_ps256:
1471 case X86::BI__builtin_ia32_vinsertf128_si256:
1472 case X86::BI__builtin_ia32_insert128i256:
1473 case X86::BI__builtin_ia32_insertf64x4:
1474 case X86::BI__builtin_ia32_insertf32x4:
1475 case X86::BI__builtin_ia32_inserti64x4:
1476 case X86::BI__builtin_ia32_inserti32x4:
1477 case X86::BI__builtin_ia32_insertf32x8:
1478 case X86::BI__builtin_ia32_inserti32x8:
1479 case X86::BI__builtin_ia32_insertf32x4_256:
1480 case X86::BI__builtin_ia32_inserti32x4_256:
1481 case X86::BI__builtin_ia32_insertf64x2_256:
1482 case X86::BI__builtin_ia32_inserti64x2_256:
1483 case X86::BI__builtin_ia32_insertf64x2_512:
1484 case X86::BI__builtin_ia32_inserti64x2_512: {
1485 unsigned DstNumElts =
1487 unsigned SrcNumElts =
1489 unsigned SubVectors = DstNumElts / SrcNumElts;
1491 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1492 Index &= SubVectors - 1;
1493 Index *= SrcNumElts;
1494
1495 int Indices[16];
1496 for (unsigned i = 0; i != DstNumElts; ++i)
1497 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
1498
1500 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
1501
1502 for (unsigned i = 0; i != DstNumElts; ++i) {
1503 if (i >= Index && i < (Index + SrcNumElts))
1504 Indices[i] = (i - Index) + DstNumElts;
1505 else
1506 Indices[i] = i;
1507 }
1508
1509 return Builder.CreateShuffleVector(Ops[0], Op1,
1510 ArrayRef(Indices, DstNumElts), "insert");
1511 }
1512 case X86::BI__builtin_ia32_pmovqd512_mask:
1513 case X86::BI__builtin_ia32_pmovwb512_mask: {
1515 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
1516 }
1517 case X86::BI__builtin_ia32_pmovdb512_mask:
1518 case X86::BI__builtin_ia32_pmovdw512_mask:
1519 case X86::BI__builtin_ia32_pmovqw512_mask: {
1520 if (const auto *C = dyn_cast(Ops[2]))
1521 if (C->isAllOnesValue())
1522 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
1523
1524 Intrinsic::ID IID;
1525 switch (BuiltinID) {
1526 default: llvm_unreachable("Unsupported intrinsic!");
1527 case X86::BI__builtin_ia32_pmovdb512_mask:
1528 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
1529 break;
1530 case X86::BI__builtin_ia32_pmovdw512_mask:
1531 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
1532 break;
1533 case X86::BI__builtin_ia32_pmovqw512_mask:
1534 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
1535 break;
1536 }
1537
1539 return Builder.CreateCall(Intr, Ops);
1540 }
1541 case X86::BI__builtin_ia32_pblendw128:
1542 case X86::BI__builtin_ia32_blendpd:
1543 case X86::BI__builtin_ia32_blendps:
1544 case X86::BI__builtin_ia32_blendpd256:
1545 case X86::BI__builtin_ia32_blendps256:
1546 case X86::BI__builtin_ia32_pblendw256:
1547 case X86::BI__builtin_ia32_pblendd128:
1548 case X86::BI__builtin_ia32_pblendd256: {
1549 unsigned NumElts =
1552
1553 int Indices[16];
1554
1555
1556 for (unsigned i = 0; i != NumElts; ++i)
1557 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
1558
1559 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1560 ArrayRef(Indices, NumElts), "blend");
1561 }
1562 case X86::BI__builtin_ia32_pshuflw:
1563 case X86::BI__builtin_ia32_pshuflw256:
1564 case X86::BI__builtin_ia32_pshuflw512: {
1567 unsigned NumElts = Ty->getNumElements();
1568
1569
1570 Imm = (Imm & 0xff) * 0x01010101;
1571
1572 int Indices[32];
1573 for (unsigned l = 0; l != NumElts; l += 8) {
1574 for (unsigned i = 0; i != 4; ++i) {
1575 Indices[l + i] = l + (Imm & 3);
1576 Imm >>= 2;
1577 }
1578 for (unsigned i = 4; i != 8; ++i)
1579 Indices[l + i] = l + i;
1580 }
1581
1582 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1583 "pshuflw");
1584 }
1585 case X86::BI__builtin_ia32_pshufhw:
1586 case X86::BI__builtin_ia32_pshufhw256:
1587 case X86::BI__builtin_ia32_pshufhw512: {
1590 unsigned NumElts = Ty->getNumElements();
1591
1592
1593 Imm = (Imm & 0xff) * 0x01010101;
1594
1595 int Indices[32];
1596 for (unsigned l = 0; l != NumElts; l += 8) {
1597 for (unsigned i = 0; i != 4; ++i)
1598 Indices[l + i] = l + i;
1599 for (unsigned i = 4; i != 8; ++i) {
1600 Indices[l + i] = l + 4 + (Imm & 3);
1601 Imm >>= 2;
1602 }
1603 }
1604
1605 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1606 "pshufhw");
1607 }
1608 case X86::BI__builtin_ia32_pshufd:
1609 case X86::BI__builtin_ia32_pshufd256:
1610 case X86::BI__builtin_ia32_pshufd512:
1611 case X86::BI__builtin_ia32_vpermilpd:
1612 case X86::BI__builtin_ia32_vpermilps:
1613 case X86::BI__builtin_ia32_vpermilpd256:
1614 case X86::BI__builtin_ia32_vpermilps256:
1615 case X86::BI__builtin_ia32_vpermilpd512:
1616 case X86::BI__builtin_ia32_vpermilps512: {
1619 unsigned NumElts = Ty->getNumElements();
1620 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1621 unsigned NumLaneElts = NumElts / NumLanes;
1622
1623
1624 Imm = (Imm & 0xff) * 0x01010101;
1625
1626 int Indices[16];
1627 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1628 for (unsigned i = 0; i != NumLaneElts; ++i) {
1629 Indices[i + l] = (Imm % NumLaneElts) + l;
1630 Imm /= NumLaneElts;
1631 }
1632 }
1633
1634 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1635 "permil");
1636 }
1637 case X86::BI__builtin_ia32_shufpd:
1638 case X86::BI__builtin_ia32_shufpd256:
1639 case X86::BI__builtin_ia32_shufpd512:
1640 case X86::BI__builtin_ia32_shufps:
1641 case X86::BI__builtin_ia32_shufps256:
1642 case X86::BI__builtin_ia32_shufps512: {
1645 unsigned NumElts = Ty->getNumElements();
1646 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1647 unsigned NumLaneElts = NumElts / NumLanes;
1648
1649
1650 Imm = (Imm & 0xff) * 0x01010101;
1651
1652 int Indices[16];
1653 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1654 for (unsigned i = 0; i != NumLaneElts; ++i) {
1655 unsigned Index = Imm % NumLaneElts;
1656 Imm /= NumLaneElts;
1657 if (i >= (NumLaneElts / 2))
1658 Index += NumElts;
1659 Indices[l + i] = l + Index;
1660 }
1661 }
1662
1663 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1664 ArrayRef(Indices, NumElts), "shufp");
1665 }
1666 case X86::BI__builtin_ia32_permdi256:
1667 case X86::BI__builtin_ia32_permdf256:
1668 case X86::BI__builtin_ia32_permdi512:
1669 case X86::BI__builtin_ia32_permdf512: {
1672 unsigned NumElts = Ty->getNumElements();
1673
1674
1675 int Indices[8];
1676 for (unsigned l = 0; l != NumElts; l += 4)
1677 for (unsigned i = 0; i != 4; ++i)
1678 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
1679
1680 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1681 "perm");
1682 }
1683 case X86::BI__builtin_ia32_palignr128:
1684 case X86::BI__builtin_ia32_palignr256:
1685 case X86::BI__builtin_ia32_palignr512: {
1687
1688 unsigned NumElts =
1690 assert(NumElts % 16 == 0);
1691
1692
1693
1694 if (ShiftVal >= 32)
1696
1697
1698
1699 if (ShiftVal > 16) {
1700 ShiftVal -= 16;
1701 Ops[1] = Ops[0];
1702 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
1703 }
1704
1705 int Indices[64];
1706
1707 for (unsigned l = 0; l != NumElts; l += 16) {
1708 for (unsigned i = 0; i != 16; ++i) {
1709 unsigned Idx = ShiftVal + i;
1710 if (Idx >= 16)
1711 Idx += NumElts - 16;
1712 Indices[l + i] = Idx + l;
1713 }
1714 }
1715
1716 return Builder.CreateShuffleVector(Ops[1], Ops[0],
1717 ArrayRef(Indices, NumElts), "palignr");
1718 }
1719 case X86::BI__builtin_ia32_alignd128:
1720 case X86::BI__builtin_ia32_alignd256:
1721 case X86::BI__builtin_ia32_alignd512:
1722 case X86::BI__builtin_ia32_alignq128:
1723 case X86::BI__builtin_ia32_alignq256:
1724 case X86::BI__builtin_ia32_alignq512: {
1725 unsigned NumElts =
1728
1729
1730 ShiftVal &= NumElts - 1;
1731
1732 int Indices[16];
1733 for (unsigned i = 0; i != NumElts; ++i)
1734 Indices[i] = i + ShiftVal;
1735
1736 return Builder.CreateShuffleVector(Ops[1], Ops[0],
1737 ArrayRef(Indices, NumElts), "valign");
1738 }
1739 case X86::BI__builtin_ia32_shuf_f32x4_256:
1740 case X86::BI__builtin_ia32_shuf_f64x2_256:
1741 case X86::BI__builtin_ia32_shuf_i32x4_256:
1742 case X86::BI__builtin_ia32_shuf_i64x2_256:
1743 case X86::BI__builtin_ia32_shuf_f32x4:
1744 case X86::BI__builtin_ia32_shuf_f64x2:
1745 case X86::BI__builtin_ia32_shuf_i32x4:
1746 case X86::BI__builtin_ia32_shuf_i64x2: {
1749 unsigned NumElts = Ty->getNumElements();
1750 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
1751 unsigned NumLaneElts = NumElts / NumLanes;
1752
1753 int Indices[16];
1754 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1755 unsigned Index = (Imm % NumLanes) * NumLaneElts;
1756 Imm /= NumLanes;
1757 if (l >= (NumElts / 2))
1758 Index += NumElts;
1759 for (unsigned i = 0; i != NumLaneElts; ++i) {
1760 Indices[l + i] = Index + i;
1761 }
1762 }
1763
1764 return Builder.CreateShuffleVector(Ops[0], Ops[1],
1765 ArrayRef(Indices, NumElts), "shuf");
1766 }
1767
1768 case X86::BI__builtin_ia32_vperm2f128_pd256:
1769 case X86::BI__builtin_ia32_vperm2f128_ps256:
1770 case X86::BI__builtin_ia32_vperm2f128_si256:
1771 case X86::BI__builtin_ia32_permti256: {
1773 unsigned NumElts =
1775
1776
1777
1778
1779
1780
1781 Value *OutOps[2];
1782 int Indices[8];
1783 for (unsigned l = 0; l != 2; ++l) {
1784
1785 if (Imm & (1 << ((l * 4) + 3)))
1786 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
1787 else if (Imm & (1 << ((l * 4) + 1)))
1788 OutOps[l] = Ops[1];
1789 else
1790 OutOps[l] = Ops[0];
1791
1792 for (unsigned i = 0; i != NumElts/2; ++i) {
1793
1794 unsigned Idx = (l * NumElts) + i;
1795
1796
1797 if (Imm & (1 << (l * 4)))
1798 Idx += NumElts/2;
1799 Indices[(l * (NumElts/2)) + i] = Idx;
1800 }
1801 }
1802
1803 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
1804 ArrayRef(Indices, NumElts), "vperm");
1805 }
1806
1807 case X86::BI__builtin_ia32_pslldqi128_byteshift:
1808 case X86::BI__builtin_ia32_pslldqi256_byteshift:
1809 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
1812
1813 unsigned NumElts = VecTy->getNumElements();
1814 Value *Zero = llvm::Constant::getNullValue(VecTy);
1815
1816
1817 if (ShiftVal >= 16)
1818 return Zero;
1819
1820 int Indices[64];
1821
1822 for (unsigned l = 0; l != NumElts; l += 16) {
1823 for (unsigned i = 0; i != 16; ++i) {
1824 unsigned Idx = NumElts + i - ShiftVal;
1825 if (Idx < NumElts)
1826 Idx -= NumElts - 16;
1827 Indices[l + i] = Idx + l;
1828 }
1829 }
1830 return Builder.CreateShuffleVector(Zero, Ops[0], ArrayRef(Indices, NumElts),
1831 "pslldq");
1832 }
1833 case X86::BI__builtin_ia32_psrldqi128_byteshift:
1834 case X86::BI__builtin_ia32_psrldqi256_byteshift:
1835 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
1838
1839 unsigned NumElts = VecTy->getNumElements();
1840 Value *Zero = llvm::Constant::getNullValue(VecTy);
1841
1842
1843 if (ShiftVal >= 16)
1844 return Zero;
1845
1846 int Indices[64];
1847
1848 for (unsigned l = 0; l != NumElts; l += 16) {
1849 for (unsigned i = 0; i != 16; ++i) {
1850 unsigned Idx = i + ShiftVal;
1851 if (Idx >= 16)
1852 Idx += NumElts - 16;
1853 Indices[l + i] = Idx + l;
1854 }
1855 }
1856 return Builder.CreateShuffleVector(Ops[0], Zero, ArrayRef(Indices, NumElts),
1857 "psrldq");
1858 }
1859 case X86::BI__builtin_ia32_kshiftliqi:
1860 case X86::BI__builtin_ia32_kshiftlihi:
1861 case X86::BI__builtin_ia32_kshiftlisi:
1862 case X86::BI__builtin_ia32_kshiftlidi: {
1864 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
1865
1866 if (ShiftVal >= NumElts)
1867 return llvm::Constant::getNullValue(Ops[0]->getType());
1868
1870
1871 int Indices[64];
1872 for (unsigned i = 0; i != NumElts; ++i)
1873 Indices[i] = NumElts + i - ShiftVal;
1874
1875 Value *Zero = llvm::Constant::getNullValue(In->getType());
1877 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
1878 return Builder.CreateBitCast(SV, Ops[0]->getType());
1879 }
1880 case X86::BI__builtin_ia32_kshiftriqi:
1881 case X86::BI__builtin_ia32_kshiftrihi:
1882 case X86::BI__builtin_ia32_kshiftrisi:
1883 case X86::BI__builtin_ia32_kshiftridi: {
1885 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
1886
1887 if (ShiftVal >= NumElts)
1888 return llvm::Constant::getNullValue(Ops[0]->getType());
1889
1891
1892 int Indices[64];
1893 for (unsigned i = 0; i != NumElts; ++i)
1894 Indices[i] = i + ShiftVal;
1895
1896 Value *Zero = llvm::Constant::getNullValue(In->getType());
1898 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
1899 return Builder.CreateBitCast(SV, Ops[0]->getType());
1900 }
1901 case X86::BI__builtin_ia32_movnti:
1902 case X86::BI__builtin_ia32_movnti64:
1903 case X86::BI__builtin_ia32_movntsd:
1904 case X86::BI__builtin_ia32_movntss: {
1905 llvm::MDNode *Node = llvm::MDNode::get(
1907
1908 Value *Ptr = Ops[0];
1909 Value *Src = Ops[1];
1910
1911
1912 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
1913 BuiltinID == X86::BI__builtin_ia32_movntss)
1914 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
1915
1916
1917 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
1918 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
1919 SI->setAlignment(llvm::Align(1));
1920 return SI;
1921 }
1922
1923 case X86::BI__builtin_ia32_vprotbi:
1924 case X86::BI__builtin_ia32_vprotwi:
1925 case X86::BI__builtin_ia32_vprotdi:
1926 case X86::BI__builtin_ia32_vprotqi:
1927 case X86::BI__builtin_ia32_prold128:
1928 case X86::BI__builtin_ia32_prold256:
1929 case X86::BI__builtin_ia32_prold512:
1930 case X86::BI__builtin_ia32_prolq128:
1931 case X86::BI__builtin_ia32_prolq256:
1932 case X86::BI__builtin_ia32_prolq512:
1934 case X86::BI__builtin_ia32_prord128:
1935 case X86::BI__builtin_ia32_prord256:
1936 case X86::BI__builtin_ia32_prord512:
1937 case X86::BI__builtin_ia32_prorq128:
1938 case X86::BI__builtin_ia32_prorq256:
1939 case X86::BI__builtin_ia32_prorq512:
1941 case X86::BI__builtin_ia32_selectb_128:
1942 case X86::BI__builtin_ia32_selectb_256:
1943 case X86::BI__builtin_ia32_selectb_512:
1944 case X86::BI__builtin_ia32_selectw_128:
1945 case X86::BI__builtin_ia32_selectw_256:
1946 case X86::BI__builtin_ia32_selectw_512:
1947 case X86::BI__builtin_ia32_selectd_128:
1948 case X86::BI__builtin_ia32_selectd_256:
1949 case X86::BI__builtin_ia32_selectd_512:
1950 case X86::BI__builtin_ia32_selectq_128:
1951 case X86::BI__builtin_ia32_selectq_256:
1952 case X86::BI__builtin_ia32_selectq_512:
1953 case X86::BI__builtin_ia32_selectph_128:
1954 case X86::BI__builtin_ia32_selectph_256:
1955 case X86::BI__builtin_ia32_selectph_512:
1956 case X86::BI__builtin_ia32_selectpbf_128:
1957 case X86::BI__builtin_ia32_selectpbf_256:
1958 case X86::BI__builtin_ia32_selectpbf_512:
1959 case X86::BI__builtin_ia32_selectps_128:
1960 case X86::BI__builtin_ia32_selectps_256:
1961 case X86::BI__builtin_ia32_selectps_512:
1962 case X86::BI__builtin_ia32_selectpd_128:
1963 case X86::BI__builtin_ia32_selectpd_256:
1964 case X86::BI__builtin_ia32_selectpd_512:
1965 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
1966 case X86::BI__builtin_ia32_selectsh_128:
1967 case X86::BI__builtin_ia32_selectsbf_128:
1968 case X86::BI__builtin_ia32_selectss_128:
1969 case X86::BI__builtin_ia32_selectsd_128: {
1970 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
1971 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
1973 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
1974 }
1975 case X86::BI__builtin_ia32_cmpb128_mask:
1976 case X86::BI__builtin_ia32_cmpb256_mask:
1977 case X86::BI__builtin_ia32_cmpb512_mask:
1978 case X86::BI__builtin_ia32_cmpw128_mask:
1979 case X86::BI__builtin_ia32_cmpw256_mask:
1980 case X86::BI__builtin_ia32_cmpw512_mask:
1981 case X86::BI__builtin_ia32_cmpd128_mask:
1982 case X86::BI__builtin_ia32_cmpd256_mask:
1983 case X86::BI__builtin_ia32_cmpd512_mask:
1984 case X86::BI__builtin_ia32_cmpq128_mask:
1985 case X86::BI__builtin_ia32_cmpq256_mask:
1986 case X86::BI__builtin_ia32_cmpq512_mask: {
1989 }
1990 case X86::BI__builtin_ia32_ucmpb128_mask:
1991 case X86::BI__builtin_ia32_ucmpb256_mask:
1992 case X86::BI__builtin_ia32_ucmpb512_mask:
1993 case X86::BI__builtin_ia32_ucmpw128_mask:
1994 case X86::BI__builtin_ia32_ucmpw256_mask:
1995 case X86::BI__builtin_ia32_ucmpw512_mask:
1996 case X86::BI__builtin_ia32_ucmpd128_mask:
1997 case X86::BI__builtin_ia32_ucmpd256_mask:
1998 case X86::BI__builtin_ia32_ucmpd512_mask:
1999 case X86::BI__builtin_ia32_ucmpq128_mask:
2000 case X86::BI__builtin_ia32_ucmpq256_mask:
2001 case X86::BI__builtin_ia32_ucmpq512_mask: {
2004 }
2005 case X86::BI__builtin_ia32_vpcomb:
2006 case X86::BI__builtin_ia32_vpcomw:
2007 case X86::BI__builtin_ia32_vpcomd:
2008 case X86::BI__builtin_ia32_vpcomq:
2010 case X86::BI__builtin_ia32_vpcomub:
2011 case X86::BI__builtin_ia32_vpcomuw:
2012 case X86::BI__builtin_ia32_vpcomud:
2013 case X86::BI__builtin_ia32_vpcomuq:
2015
2016 case X86::BI__builtin_ia32_kortestcqi:
2017 case X86::BI__builtin_ia32_kortestchi:
2018 case X86::BI__builtin_ia32_kortestcsi:
2019 case X86::BI__builtin_ia32_kortestcdi: {
2021 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
2024 }
2025 case X86::BI__builtin_ia32_kortestzqi:
2026 case X86::BI__builtin_ia32_kortestzhi:
2027 case X86::BI__builtin_ia32_kortestzsi:
2028 case X86::BI__builtin_ia32_kortestzdi: {
2030 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
2033 }
2034
2035 case X86::BI__builtin_ia32_ktestcqi:
2036 case X86::BI__builtin_ia32_ktestzqi:
2037 case X86::BI__builtin_ia32_ktestchi:
2038 case X86::BI__builtin_ia32_ktestzhi:
2039 case X86::BI__builtin_ia32_ktestcsi:
2040 case X86::BI__builtin_ia32_ktestzsi:
2041 case X86::BI__builtin_ia32_ktestcdi:
2042 case X86::BI__builtin_ia32_ktestzdi: {
2043 Intrinsic::ID IID;
2044 switch (BuiltinID) {
2045 default: llvm_unreachable("Unsupported intrinsic!");
2046 case X86::BI__builtin_ia32_ktestcqi:
2047 IID = Intrinsic::x86_avx512_ktestc_b;
2048 break;
2049 case X86::BI__builtin_ia32_ktestzqi:
2050 IID = Intrinsic::x86_avx512_ktestz_b;
2051 break;
2052 case X86::BI__builtin_ia32_ktestchi:
2053 IID = Intrinsic::x86_avx512_ktestc_w;
2054 break;
2055 case X86::BI__builtin_ia32_ktestzhi:
2056 IID = Intrinsic::x86_avx512_ktestz_w;
2057 break;
2058 case X86::BI__builtin_ia32_ktestcsi:
2059 IID = Intrinsic::x86_avx512_ktestc_d;
2060 break;
2061 case X86::BI__builtin_ia32_ktestzsi:
2062 IID = Intrinsic::x86_avx512_ktestz_d;
2063 break;
2064 case X86::BI__builtin_ia32_ktestcdi:
2065 IID = Intrinsic::x86_avx512_ktestc_q;
2066 break;
2067 case X86::BI__builtin_ia32_ktestzdi:
2068 IID = Intrinsic::x86_avx512_ktestz_q;
2069 break;
2070 }
2071
2072 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2076 return Builder.CreateCall(Intr, {LHS, RHS});
2077 }
2078
2079 case X86::BI__builtin_ia32_kaddqi:
2080 case X86::BI__builtin_ia32_kaddhi:
2081 case X86::BI__builtin_ia32_kaddsi:
2082 case X86::BI__builtin_ia32_kadddi: {
2083 Intrinsic::ID IID;
2084 switch (BuiltinID) {
2085 default: llvm_unreachable("Unsupported intrinsic!");
2086 case X86::BI__builtin_ia32_kaddqi:
2087 IID = Intrinsic::x86_avx512_kadd_b;
2088 break;
2089 case X86::BI__builtin_ia32_kaddhi:
2090 IID = Intrinsic::x86_avx512_kadd_w;
2091 break;
2092 case X86::BI__builtin_ia32_kaddsi:
2093 IID = Intrinsic::x86_avx512_kadd_d;
2094 break;
2095 case X86::BI__builtin_ia32_kadddi:
2096 IID = Intrinsic::x86_avx512_kadd_q;
2097 break;
2098 }
2099
2100 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2104 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
2105 return Builder.CreateBitCast(Res, Ops[0]->getType());
2106 }
2107 case X86::BI__builtin_ia32_kandqi:
2108 case X86::BI__builtin_ia32_kandhi:
2109 case X86::BI__builtin_ia32_kandsi:
2110 case X86::BI__builtin_ia32_kanddi:
2112 case X86::BI__builtin_ia32_kandnqi:
2113 case X86::BI__builtin_ia32_kandnhi:
2114 case X86::BI__builtin_ia32_kandnsi:
2115 case X86::BI__builtin_ia32_kandndi:
2116 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
2117 case X86::BI__builtin_ia32_korqi:
2118 case X86::BI__builtin_ia32_korhi:
2119 case X86::BI__builtin_ia32_korsi:
2120 case X86::BI__builtin_ia32_kordi:
2122 case X86::BI__builtin_ia32_kxnorqi:
2123 case X86::BI__builtin_ia32_kxnorhi:
2124 case X86::BI__builtin_ia32_kxnorsi:
2125 case X86::BI__builtin_ia32_kxnordi:
2126 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
2127 case X86::BI__builtin_ia32_kxorqi:
2128 case X86::BI__builtin_ia32_kxorhi:
2129 case X86::BI__builtin_ia32_kxorsi:
2130 case X86::BI__builtin_ia32_kxordi:
2132 case X86::BI__builtin_ia32_knotqi:
2133 case X86::BI__builtin_ia32_knothi:
2134 case X86::BI__builtin_ia32_knotsi:
2135 case X86::BI__builtin_ia32_knotdi: {
2136 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2138 return Builder.CreateBitCast(Builder.CreateNot(Res),
2139 Ops[0]->getType());
2140 }
2141 case X86::BI__builtin_ia32_kmovb:
2142 case X86::BI__builtin_ia32_kmovw:
2143 case X86::BI__builtin_ia32_kmovd:
2144 case X86::BI__builtin_ia32_kmovq: {
2145
2146
2147
2148 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2150 return Builder.CreateBitCast(Res, Ops[0]->getType());
2151 }
2152
2153 case X86::BI__builtin_ia32_kunpckdi:
2154 case X86::BI__builtin_ia32_kunpcksi:
2155 case X86::BI__builtin_ia32_kunpckhi: {
2156 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2159 int Indices[64];
2160 for (unsigned i = 0; i != NumElts; ++i)
2161 Indices[i] = i;
2162
2163
2164
2165 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2166 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2167
2168
2170 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2171 return Builder.CreateBitCast(Res, Ops[0]->getType());
2172 }
2173
2174 case X86::BI__builtin_ia32_sqrtsh_round_mask:
2175 case X86::BI__builtin_ia32_sqrtsd_round_mask:
2176 case X86::BI__builtin_ia32_sqrtss_round_mask: {
2178
2179
2180 if (CC != 4) {
2181 Intrinsic::ID IID;
2182
2183 switch (BuiltinID) {
2184 default:
2185 llvm_unreachable("Unsupported intrinsic!");
2186 case X86::BI__builtin_ia32_sqrtsh_round_mask:
2187 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
2188 break;
2189 case X86::BI__builtin_ia32_sqrtsd_round_mask:
2190 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
2191 break;
2192 case X86::BI__builtin_ia32_sqrtss_round_mask:
2193 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
2194 break;
2195 }
2196 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2197 }
2198 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2200 if (Builder.getIsFPConstrained()) {
2202 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2204 A = Builder.CreateConstrainedFPCall(F, A);
2205 } else {
2206 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
2207 A = Builder.CreateCall(F, A);
2208 }
2209 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2211 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
2212 }
2213 case X86::BI__builtin_ia32_sqrtph512:
2214 case X86::BI__builtin_ia32_sqrtps512:
2215 case X86::BI__builtin_ia32_sqrtpd512: {
2217
2218
2219 if (CC != 4) {
2220 Intrinsic::ID IID;
2221
2222 switch (BuiltinID) {
2223 default:
2224 llvm_unreachable("Unsupported intrinsic!");
2225 case X86::BI__builtin_ia32_sqrtph512:
2226 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
2227 break;
2228 case X86::BI__builtin_ia32_sqrtps512:
2229 IID = Intrinsic::x86_avx512_sqrt_ps_512;
2230 break;
2231 case X86::BI__builtin_ia32_sqrtpd512:
2232 IID = Intrinsic::x86_avx512_sqrt_pd_512;
2233 break;
2234 }
2235 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2236 }
2237 if (Builder.getIsFPConstrained()) {
2239 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2241 return Builder.CreateConstrainedFPCall(F, Ops[0]);
2242 } else {
2244 return Builder.CreateCall(F, Ops[0]);
2245 }
2246 }
2247
2248 case X86::BI__builtin_ia32_pmuludq128:
2249 case X86::BI__builtin_ia32_pmuludq256:
2250 case X86::BI__builtin_ia32_pmuludq512:
2251 return EmitX86Muldq(*this, false, Ops);
2252
2253 case X86::BI__builtin_ia32_pmuldq128:
2254 case X86::BI__builtin_ia32_pmuldq256:
2255 case X86::BI__builtin_ia32_pmuldq512:
2256 return EmitX86Muldq(*this, true, Ops);
2257
2258 case X86::BI__builtin_ia32_pternlogd512_mask:
2259 case X86::BI__builtin_ia32_pternlogq512_mask:
2260 case X86::BI__builtin_ia32_pternlogd128_mask:
2261 case X86::BI__builtin_ia32_pternlogd256_mask:
2262 case X86::BI__builtin_ia32_pternlogq128_mask:
2263 case X86::BI__builtin_ia32_pternlogq256_mask:
2264 return EmitX86Ternlog(*this, false, Ops);
2265
2266 case X86::BI__builtin_ia32_pternlogd512_maskz:
2267 case X86::BI__builtin_ia32_pternlogq512_maskz:
2268 case X86::BI__builtin_ia32_pternlogd128_maskz:
2269 case X86::BI__builtin_ia32_pternlogd256_maskz:
2270 case X86::BI__builtin_ia32_pternlogq128_maskz:
2271 case X86::BI__builtin_ia32_pternlogq256_maskz:
2272 return EmitX86Ternlog(*this, true, Ops);
2273
2274 case X86::BI__builtin_ia32_vpshldd128:
2275 case X86::BI__builtin_ia32_vpshldd256:
2276 case X86::BI__builtin_ia32_vpshldd512:
2277 case X86::BI__builtin_ia32_vpshldq128:
2278 case X86::BI__builtin_ia32_vpshldq256:
2279 case X86::BI__builtin_ia32_vpshldq512:
2280 case X86::BI__builtin_ia32_vpshldw128:
2281 case X86::BI__builtin_ia32_vpshldw256:
2282 case X86::BI__builtin_ia32_vpshldw512:
2284
2285 case X86::BI__builtin_ia32_vpshrdd128:
2286 case X86::BI__builtin_ia32_vpshrdd256:
2287 case X86::BI__builtin_ia32_vpshrdd512:
2288 case X86::BI__builtin_ia32_vpshrdq128:
2289 case X86::BI__builtin_ia32_vpshrdq256:
2290 case X86::BI__builtin_ia32_vpshrdq512:
2291 case X86::BI__builtin_ia32_vpshrdw128:
2292 case X86::BI__builtin_ia32_vpshrdw256:
2293 case X86::BI__builtin_ia32_vpshrdw512:
2294
2296
2297
2298 case X86::BI__builtin_ia32_reduce_fadd_pd512:
2299 case X86::BI__builtin_ia32_reduce_fadd_ps512:
2300 case X86::BI__builtin_ia32_reduce_fadd_ph512:
2301 case X86::BI__builtin_ia32_reduce_fadd_ph256:
2302 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
2304 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
2305 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2306 Builder.getFastMathFlags().setAllowReassoc();
2307 return Builder.CreateCall(F, {Ops[0], Ops[1]});
2308 }
2309 case X86::BI__builtin_ia32_reduce_fmul_pd512:
2310 case X86::BI__builtin_ia32_reduce_fmul_ps512:
2311 case X86::BI__builtin_ia32_reduce_fmul_ph512:
2312 case X86::BI__builtin_ia32_reduce_fmul_ph256:
2313 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
2315 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
2316 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2317 Builder.getFastMathFlags().setAllowReassoc();
2318 return Builder.CreateCall(F, {Ops[0], Ops[1]});
2319 }
2320 case X86::BI__builtin_ia32_reduce_fmax_pd512:
2321 case X86::BI__builtin_ia32_reduce_fmax_ps512:
2322 case X86::BI__builtin_ia32_reduce_fmax_ph512:
2323 case X86::BI__builtin_ia32_reduce_fmax_ph256:
2324 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
2326 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
2327 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2328 Builder.getFastMathFlags().setNoNaNs();
2329 return Builder.CreateCall(F, {Ops[0]});
2330 }
2331 case X86::BI__builtin_ia32_reduce_fmin_pd512:
2332 case X86::BI__builtin_ia32_reduce_fmin_ps512:
2333 case X86::BI__builtin_ia32_reduce_fmin_ph512:
2334 case X86::BI__builtin_ia32_reduce_fmin_ph256:
2335 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
2337 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
2338 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2339 Builder.getFastMathFlags().setNoNaNs();
2340 return Builder.CreateCall(F, {Ops[0]});
2341 }
2342
2343 case X86::BI__builtin_ia32_rdrand16_step:
2344 case X86::BI__builtin_ia32_rdrand32_step:
2345 case X86::BI__builtin_ia32_rdrand64_step:
2346 case X86::BI__builtin_ia32_rdseed16_step:
2347 case X86::BI__builtin_ia32_rdseed32_step:
2348 case X86::BI__builtin_ia32_rdseed64_step: {
2349 Intrinsic::ID ID;
2350 switch (BuiltinID) {
2351 default: llvm_unreachable("Unsupported intrinsic!");
2352 case X86::BI__builtin_ia32_rdrand16_step:
2353 ID = Intrinsic::x86_rdrand_16;
2354 break;
2355 case X86::BI__builtin_ia32_rdrand32_step:
2356 ID = Intrinsic::x86_rdrand_32;
2357 break;
2358 case X86::BI__builtin_ia32_rdrand64_step:
2359 ID = Intrinsic::x86_rdrand_64;
2360 break;
2361 case X86::BI__builtin_ia32_rdseed16_step:
2362 ID = Intrinsic::x86_rdseed_16;
2363 break;
2364 case X86::BI__builtin_ia32_rdseed32_step:
2365 ID = Intrinsic::x86_rdseed_32;
2366 break;
2367 case X86::BI__builtin_ia32_rdseed64_step:
2368 ID = Intrinsic::x86_rdseed_64;
2369 break;
2370 }
2371
2373 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
2374 Ops[0]);
2375 return Builder.CreateExtractValue(Call, 1);
2376 }
2377 case X86::BI__builtin_ia32_addcarryx_u32:
2378 case X86::BI__builtin_ia32_addcarryx_u64:
2379 case X86::BI__builtin_ia32_subborrow_u32:
2380 case X86::BI__builtin_ia32_subborrow_u64: {
2381 Intrinsic::ID IID;
2382 switch (BuiltinID) {
2383 default: llvm_unreachable("Unsupported intrinsic!");
2384 case X86::BI__builtin_ia32_addcarryx_u32:
2385 IID = Intrinsic::x86_addcarry_32;
2386 break;
2387 case X86::BI__builtin_ia32_addcarryx_u64:
2388 IID = Intrinsic::x86_addcarry_64;
2389 break;
2390 case X86::BI__builtin_ia32_subborrow_u32:
2391 IID = Intrinsic::x86_subborrow_32;
2392 break;
2393 case X86::BI__builtin_ia32_subborrow_u64:
2394 IID = Intrinsic::x86_subborrow_64;
2395 break;
2396 }
2397
2399 { Ops[0], Ops[1], Ops[2] });
2400 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
2401 Ops[3]);
2402 return Builder.CreateExtractValue(Call, 0);
2403 }
2404
2405 case X86::BI__builtin_ia32_fpclassps128_mask:
2406 case X86::BI__builtin_ia32_fpclassps256_mask:
2407 case X86::BI__builtin_ia32_fpclassps512_mask:
2408 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2409 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2410 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2411 case X86::BI__builtin_ia32_fpclassph128_mask:
2412 case X86::BI__builtin_ia32_fpclassph256_mask:
2413 case X86::BI__builtin_ia32_fpclassph512_mask:
2414 case X86::BI__builtin_ia32_fpclasspd128_mask:
2415 case X86::BI__builtin_ia32_fpclasspd256_mask:
2416 case X86::BI__builtin_ia32_fpclasspd512_mask: {
2417 unsigned NumElts =
2419 Value *MaskIn = Ops[2];
2420 Ops.erase(&Ops[2]);
2421
2422 Intrinsic::ID ID;
2423 switch (BuiltinID) {
2424 default: llvm_unreachable("Unsupported intrinsic!");
2425 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2426 ID = Intrinsic::x86_avx10_fpclass_bf16_128;
2427 break;
2428 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2429 ID = Intrinsic::x86_avx10_fpclass_bf16_256;
2430 break;
2431 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2432 ID = Intrinsic::x86_avx10_fpclass_bf16_512;
2433 break;
2434 case X86::BI__builtin_ia32_fpclassph128_mask:
2435 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
2436 break;
2437 case X86::BI__builtin_ia32_fpclassph256_mask:
2438 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
2439 break;
2440 case X86::BI__builtin_ia32_fpclassph512_mask:
2441 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
2442 break;
2443 case X86::BI__builtin_ia32_fpclassps128_mask:
2444 ID = Intrinsic::x86_avx512_fpclass_ps_128;
2445 break;
2446 case X86::BI__builtin_ia32_fpclassps256_mask:
2447 ID = Intrinsic::x86_avx512_fpclass_ps_256;
2448 break;
2449 case X86::BI__builtin_ia32_fpclassps512_mask:
2450 ID = Intrinsic::x86_avx512_fpclass_ps_512;
2451 break;
2452 case X86::BI__builtin_ia32_fpclasspd128_mask:
2453 ID = Intrinsic::x86_avx512_fpclass_pd_128;
2454 break;
2455 case X86::BI__builtin_ia32_fpclasspd256_mask:
2456 ID = Intrinsic::x86_avx512_fpclass_pd_256;
2457 break;
2458 case X86::BI__builtin_ia32_fpclasspd512_mask:
2459 ID = Intrinsic::x86_avx512_fpclass_pd_512;
2460 break;
2461 }
2462
2463 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2465 }
2466
2467 case X86::BI__builtin_ia32_vp2intersect_q_512:
2468 case X86::BI__builtin_ia32_vp2intersect_q_256:
2469 case X86::BI__builtin_ia32_vp2intersect_q_128:
2470 case X86::BI__builtin_ia32_vp2intersect_d_512:
2471 case X86::BI__builtin_ia32_vp2intersect_d_256:
2472 case X86::BI__builtin_ia32_vp2intersect_d_128: {
2473 unsigned NumElts =
2475 Intrinsic::ID ID;
2476
2477 switch (BuiltinID) {
2478 default: llvm_unreachable("Unsupported intrinsic!");
2479 case X86::BI__builtin_ia32_vp2intersect_q_512:
2480 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
2481 break;
2482 case X86::BI__builtin_ia32_vp2intersect_q_256:
2483 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
2484 break;
2485 case X86::BI__builtin_ia32_vp2intersect_q_128:
2486 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
2487 break;
2488 case X86::BI__builtin_ia32_vp2intersect_d_512:
2489 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
2490 break;
2491 case X86::BI__builtin_ia32_vp2intersect_d_256:
2492 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
2493 break;
2494 case X86::BI__builtin_ia32_vp2intersect_d_128:
2495 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
2496 break;
2497 }
2498
2499 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
2502 Builder.CreateDefaultAlignedStore(Result, Ops[2]);
2503
2506 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
2507 }
2508
2509 case X86::BI__builtin_ia32_vpmultishiftqb128:
2510 case X86::BI__builtin_ia32_vpmultishiftqb256:
2511 case X86::BI__builtin_ia32_vpmultishiftqb512: {
2512 Intrinsic::ID ID;
2513 switch (BuiltinID) {
2514 default: llvm_unreachable("Unsupported intrinsic!");
2515 case X86::BI__builtin_ia32_vpmultishiftqb128:
2516 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
2517 break;
2518 case X86::BI__builtin_ia32_vpmultishiftqb256:
2519 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
2520 break;
2521 case X86::BI__builtin_ia32_vpmultishiftqb512:
2522 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
2523 break;
2524 }
2525
2526 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2527 }
2528
2529 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2530 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2531 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
2532 unsigned NumElts =
2534 Value *MaskIn = Ops[2];
2535 Ops.erase(&Ops[2]);
2536
2537 Intrinsic::ID ID;
2538 switch (BuiltinID) {
2539 default: llvm_unreachable("Unsupported intrinsic!");
2540 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2541 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2542 break;
2543 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2544 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2545 break;
2546 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
2547 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2548 break;
2549 }
2550
2551 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2553 }
2554
2555
2556 case X86::BI__builtin_ia32_cmpeqps:
2557 case X86::BI__builtin_ia32_cmpeqpd:
2558 return getVectorFCmpIR(CmpInst::FCMP_OEQ, false);
2559 case X86::BI__builtin_ia32_cmpltps:
2560 case X86::BI__builtin_ia32_cmpltpd:
2561 return getVectorFCmpIR(CmpInst::FCMP_OLT, true);
2562 case X86::BI__builtin_ia32_cmpleps:
2563 case X86::BI__builtin_ia32_cmplepd:
2564 return getVectorFCmpIR(CmpInst::FCMP_OLE, true);
2565 case X86::BI__builtin_ia32_cmpunordps:
2566 case X86::BI__builtin_ia32_cmpunordpd:
2567 return getVectorFCmpIR(CmpInst::FCMP_UNO, false);
2568 case X86::BI__builtin_ia32_cmpneqps:
2569 case X86::BI__builtin_ia32_cmpneqpd:
2570 return getVectorFCmpIR(CmpInst::FCMP_UNE, false);
2571 case X86::BI__builtin_ia32_cmpnltps:
2572 case X86::BI__builtin_ia32_cmpnltpd:
2573 return getVectorFCmpIR(CmpInst::FCMP_UGE, true);
2574 case X86::BI__builtin_ia32_cmpnleps:
2575 case X86::BI__builtin_ia32_cmpnlepd:
2576 return getVectorFCmpIR(CmpInst::FCMP_UGT, true);
2577 case X86::BI__builtin_ia32_cmpordps:
2578 case X86::BI__builtin_ia32_cmpordpd:
2579 return getVectorFCmpIR(CmpInst::FCMP_ORD, false);
2580 case X86::BI__builtin_ia32_cmpph128_mask:
2581 case X86::BI__builtin_ia32_cmpph256_mask:
2582 case X86::BI__builtin_ia32_cmpph512_mask:
2583 case X86::BI__builtin_ia32_cmpps128_mask:
2584 case X86::BI__builtin_ia32_cmpps256_mask:
2585 case X86::BI__builtin_ia32_cmpps512_mask:
2586 case X86::BI__builtin_ia32_cmppd128_mask:
2587 case X86::BI__builtin_ia32_cmppd256_mask:
2588 case X86::BI__builtin_ia32_cmppd512_mask:
2589 case X86::BI__builtin_ia32_vcmpbf16512_mask:
2590 case X86::BI__builtin_ia32_vcmpbf16256_mask:
2591 case X86::BI__builtin_ia32_vcmpbf16128_mask:
2592 IsMaskFCmp = true;
2593 [[fallthrough]];
2594 case X86::BI__builtin_ia32_cmpps:
2595 case X86::BI__builtin_ia32_cmpps256:
2596 case X86::BI__builtin_ia32_cmppd:
2597 case X86::BI__builtin_ia32_cmppd256: {
2598
2599
2600
2601
2602
2603
2604
2606
2607
2608
2609
2610 FCmpInst::Predicate Pred;
2611 bool IsSignaling;
2612
2613
2614 switch (CC & 0xf) {
2615 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
2616 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
2617 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
2618 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
2619 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
2620 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
2621 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
2622 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
2623 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
2624 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
2625 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
2626 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
2627 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
2628 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
2629 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
2630 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
2631 default: llvm_unreachable("Unhandled CC");
2632 }
2633
2634
2635 if (CC & 0x10)
2636 IsSignaling = !IsSignaling;
2637
2638
2639
2640
2641
2642
2643 if (Builder.getIsFPConstrained() &&
2644 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
2645 IsMaskFCmp)) {
2646
2647 Intrinsic::ID IID;
2648 switch (BuiltinID) {
2649 default: llvm_unreachable("Unexpected builtin");
2650 case X86::BI__builtin_ia32_cmpps:
2651 IID = Intrinsic::x86_sse_cmp_ps;
2652 break;
2653 case X86::BI__builtin_ia32_cmpps256:
2654 IID = Intrinsic::x86_avx_cmp_ps_256;
2655 break;
2656 case X86::BI__builtin_ia32_cmppd:
2657 IID = Intrinsic::x86_sse2_cmp_pd;
2658 break;
2659 case X86::BI__builtin_ia32_cmppd256:
2660 IID = Intrinsic::x86_avx_cmp_pd_256;
2661 break;
2662 case X86::BI__builtin_ia32_cmpph128_mask:
2663 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
2664 break;
2665 case X86::BI__builtin_ia32_cmpph256_mask:
2666 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
2667 break;
2668 case X86::BI__builtin_ia32_cmpph512_mask:
2669 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
2670 break;
2671 case X86::BI__builtin_ia32_cmpps512_mask:
2672 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2673 break;
2674 case X86::BI__builtin_ia32_cmppd512_mask:
2675 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2676 break;
2677 case X86::BI__builtin_ia32_cmpps128_mask:
2678 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2679 break;
2680 case X86::BI__builtin_ia32_cmpps256_mask:
2681 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2682 break;
2683 case X86::BI__builtin_ia32_cmppd128_mask:
2684 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2685 break;
2686 case X86::BI__builtin_ia32_cmppd256_mask:
2687 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2688 break;
2689 }
2690
2692 if (IsMaskFCmp) {
2693 unsigned NumElts =
2698 }
2699
2700 return Builder.CreateCall(Intr, Ops);
2701 }
2702
2703
2704
2705 if (IsMaskFCmp) {
2706
2707
2708
2709
2710 unsigned NumElts =
2713 if (IsSignaling)
2714 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
2715 else
2716 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
2718 }
2719
2720 return getVectorFCmpIR(Pred, IsSignaling);
2721 }
2722
2723
2724 case X86::BI__builtin_ia32_cmpeqss:
2725 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
2726 case X86::BI__builtin_ia32_cmpltss:
2727 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
2728 case X86::BI__builtin_ia32_cmpless:
2729 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
2730 case X86::BI__builtin_ia32_cmpunordss:
2731 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
2732 case X86::BI__builtin_ia32_cmpneqss:
2733 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
2734 case X86::BI__builtin_ia32_cmpnltss:
2735 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
2736 case X86::BI__builtin_ia32_cmpnless:
2737 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
2738 case X86::BI__builtin_ia32_cmpordss:
2739 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
2740 case X86::BI__builtin_ia32_cmpeqsd:
2741 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
2742 case X86::BI__builtin_ia32_cmpltsd:
2743 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
2744 case X86::BI__builtin_ia32_cmplesd:
2745 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
2746 case X86::BI__builtin_ia32_cmpunordsd:
2747 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
2748 case X86::BI__builtin_ia32_cmpneqsd:
2749 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
2750 case X86::BI__builtin_ia32_cmpnltsd:
2751 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
2752 case X86::BI__builtin_ia32_cmpnlesd:
2753 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
2754 case X86::BI__builtin_ia32_cmpordsd:
2755 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
2756
2757
2758 case X86::BI__builtin_ia32_vcvtph2ps_mask:
2759 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
2760 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
2763 }
2764
2765
2766 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
2768 *this, Ops[2],
2770 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
2771 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2772 }
2773
2774 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2775 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
2776 Intrinsic::ID IID;
2777 switch (BuiltinID) {
2778 default: llvm_unreachable("Unsupported intrinsic!");
2779 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2780 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
2781 break;
2782 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
2783 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
2784 break;
2785 }
2786 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
2787 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
2788 }
2789
2790 case X86::BI__cpuid:
2791 case X86::BI__cpuidex: {
2793 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
2795 : llvm::ConstantInt::get(Int32Ty, 0);
2796
2797 llvm::StructType *CpuidRetTy =
2799 llvm::FunctionType *FTy =
2800 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
2801
2802 StringRef Asm, Constraints;
2803 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2804 Asm = "cpuid";
2805 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
2806 } else {
2807
2808 Asm = "xchgq %rbx, ${1:q}\n"
2809 "cpuid\n"
2810 "xchgq %rbx, ${1:q}";
2811 Constraints = "={ax},=r,={cx},={dx},0,2";
2812 }
2813
2814 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
2815 false);
2816 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
2818 Value *Store = nullptr;
2819 for (unsigned i = 0; i < 4; i++) {
2820 Value *Extracted = Builder.CreateExtractValue(IACall, i);
2821 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
2822 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
2823 }
2824
2825
2826
2827 return Store;
2828 }
2829
2830 case X86::BI__emul:
2831 case X86::BI__emulu: {
2833 bool isSigned = (BuiltinID == X86::BI__emul);
2836 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
2837 }
2838 case X86::BI__mulh:
2839 case X86::BI__umulh:
2840 case X86::BI_mul128:
2841 case X86::BI_umul128: {
2843 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
2844
2845 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
2846 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
2847 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
2848
2849 Value *MulResult, *HigherBits;
2850 if (IsSigned) {
2851 MulResult = Builder.CreateNSWMul(LHS, RHS);
2852 HigherBits = Builder.CreateAShr(MulResult, 64);
2853 } else {
2854 MulResult = Builder.CreateNUWMul(LHS, RHS);
2855 HigherBits = Builder.CreateLShr(MulResult, 64);
2856 }
2857 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
2858
2859 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
2860 return HigherBits;
2861
2863 Builder.CreateStore(HigherBits, HighBitsAddress);
2864 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
2865 }
2866
2867 case X86::BI__faststorefence: {
2868 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
2869 llvm::SyncScope::System);
2870 }
2871 case X86::BI__shiftleft128:
2872 case X86::BI__shiftright128: {
2873 llvm::Function *F = CGM.getIntrinsic(
2874 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
2876
2877
2878
2879 std::swap(Ops[0], Ops[1]);
2881 return Builder.CreateCall(F, Ops);
2882 }
2883 case X86::BI_ReadWriteBarrier:
2884 case X86::BI_ReadBarrier:
2885 case X86::BI_WriteBarrier: {
2886 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
2887 llvm::SyncScope::SingleThread);
2888 }
2889
2890 case X86::BI_AddressOfReturnAddress: {
2892 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
2893 return Builder.CreateCall(F);
2894 }
2895 case X86::BI__stosb: {
2896
2897
2898 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
2899 }
2900 case X86::BI__ud2:
2901
2903 case X86::BI__int2c: {
2904
2905 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
2906 llvm::InlineAsm *IA =
2907 llvm::InlineAsm::get(FTy, "int 0x2c", "", true);
2908 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2909 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2910 llvm::Attribute::NoReturn);
2911 llvm::CallInst *CI = Builder.CreateCall(IA);
2912 CI->setAttributes(NoReturnAttr);
2913 return CI;
2914 }
2915 case X86::BI__readfsbyte:
2916 case X86::BI__readfsword:
2917 case X86::BI__readfsdword:
2918 case X86::BI__readfsqword: {
2921 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
2922 LoadInst *Load = Builder.CreateAlignedLoad(
2924 Load->setVolatile(true);
2925 return Load;
2926 }
2927 case X86::BI__readgsbyte:
2928 case X86::BI__readgsword:
2929 case X86::BI__readgsdword:
2930 case X86::BI__readgsqword: {
2933 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
2934 LoadInst *Load = Builder.CreateAlignedLoad(
2936 Load->setVolatile(true);
2937 return Load;
2938 }
2939 case X86::BI__builtin_ia32_encodekey128_u32: {
2940 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
2941
2942 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
2943
2944 for (int i = 0; i < 3; ++i) {
2947 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
2948 }
2949
2950 return Builder.CreateExtractValue(Call, 0);
2951 }
2952 case X86::BI__builtin_ia32_encodekey256_u32: {
2953 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
2954
2956 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
2957
2958 for (int i = 0; i < 4; ++i) {
2961 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
2962 }
2963
2964 return Builder.CreateExtractValue(Call, 0);
2965 }
2966 case X86::BI__builtin_ia32_aesenc128kl_u8:
2967 case X86::BI__builtin_ia32_aesdec128kl_u8:
2968 case X86::BI__builtin_ia32_aesenc256kl_u8:
2969 case X86::BI__builtin_ia32_aesdec256kl_u8: {
2970 Intrinsic::ID IID;
2971 StringRef BlockName;
2972 switch (BuiltinID) {
2973 default:
2974 llvm_unreachable("Unexpected builtin");
2975 case X86::BI__builtin_ia32_aesenc128kl_u8:
2976 IID = Intrinsic::x86_aesenc128kl;
2977 BlockName = "aesenc128kl";
2978 break;
2979 case X86::BI__builtin_ia32_aesdec128kl_u8:
2980 IID = Intrinsic::x86_aesdec128kl;
2981 BlockName = "aesdec128kl";
2982 break;
2983 case X86::BI__builtin_ia32_aesenc256kl_u8:
2984 IID = Intrinsic::x86_aesenc256kl;
2985 BlockName = "aesenc256kl";
2986 break;
2987 case X86::BI__builtin_ia32_aesdec256kl_u8:
2988 IID = Intrinsic::x86_aesdec256kl;
2989 BlockName = "aesdec256kl";
2990 break;
2991 }
2992
2993 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
2994
2995 BasicBlock *NoError =
2999
3004
3005 Builder.SetInsertPoint(NoError);
3006 Builder.CreateDefaultAlignedStore(Out, Ops[0]);
3008
3010 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
3011 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
3013
3014 Builder.SetInsertPoint(End);
3015 return Builder.CreateExtractValue(Call, 0);
3016 }
3017 case X86::BI__builtin_ia32_aesencwide128kl_u8:
3018 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3019 case X86::BI__builtin_ia32_aesencwide256kl_u8:
3020 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
3021 Intrinsic::ID IID;
3022 StringRef BlockName;
3023 switch (BuiltinID) {
3024 case X86::BI__builtin_ia32_aesencwide128kl_u8:
3025 IID = Intrinsic::x86_aesencwide128kl;
3026 BlockName = "aesencwide128kl";
3027 break;
3028 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3029 IID = Intrinsic::x86_aesdecwide128kl;
3030 BlockName = "aesdecwide128kl";
3031 break;
3032 case X86::BI__builtin_ia32_aesencwide256kl_u8:
3033 IID = Intrinsic::x86_aesencwide256kl;
3034 BlockName = "aesencwide256kl";
3035 break;
3036 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
3037 IID = Intrinsic::x86_aesdecwide256kl;
3038 BlockName = "aesdecwide256kl";
3039 break;
3040 }
3041
3042 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
3043 Value *InOps[9];
3044 InOps[0] = Ops[2];
3045 for (int i = 0; i != 8; ++i) {
3046 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
3047 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
3048 }
3049
3051
3052 BasicBlock *NoError =
3056
3060
3061 Builder.SetInsertPoint(NoError);
3062 for (int i = 0; i != 8; ++i) {
3065 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
3066 }
3068
3070 for (int i = 0; i != 8; ++i) {
3072 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
3073 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
3074 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
3075 }
3077
3078 Builder.SetInsertPoint(End);
3079 return Builder.CreateExtractValue(Call, 0);
3080 }
3081 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
3082 IsConjFMA = true;
3083 [[fallthrough]];
3084 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
3085 Intrinsic::ID IID = IsConjFMA
3086 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
3087 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
3090 }
3091 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
3092 IsConjFMA = true;
3093 [[fallthrough]];
3094 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
3095 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3096 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3100 }
3101 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
3102 IsConjFMA = true;
3103 [[fallthrough]];
3104 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
3105 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3106 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3108 static constexpr int Mask[] = {0, 5, 6, 7};
3109 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
3110 }
3111 case X86::BI__builtin_ia32_prefetchi:
3112 return Builder.CreateCall(
3113 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
3114 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
3115 llvm::ConstantInt::get(Int32Ty, 0)});
3116 }
3117}