clang: lib/CodeGen/TargetBuiltins/AMDGPU.cpp Source File (original) (raw)
172 llvm::Type *ValueType, bool isExecHi) {
175
177 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
178
179 if (isExecHi) {
180 Value *Rt2 = Builder.CreateLShr(Call, 32);
181 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
182 return Rt2;
183 }
184
186}
189 llvm::Value *RsrcPtr) {
191 auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);
192
193 if (RsrcPtr->getType() == VecTy)
194 return RsrcPtr;
195
196 if (RsrcPtr->getType()->isIntegerTy(32)) {
197 llvm::PointerType *VecPtrTy =
199 llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
200 return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
201 }
202
203 if (RsrcPtr->getType()->isPointerTy()) {
204 auto *VecPtrTy = llvm::PointerType::get(
205 CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace());
206 llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");
207 return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");
208 }
209
211 if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)
212 return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");
213
214 llvm::report_fatal_error("Unexpected texture resource argument form");
215}
220 unsigned IntrinsicID, bool IsImageStore) {
221 auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
223 for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
225 if (ArgTy == TexQT) {
226 return I;
227 }
228
230 return I;
231 }
232 }
233
234 return ~0U;
235 };
236
238 unsigned RsrcIndex = findTextureDescIndex(E);
239
240 if (RsrcIndex == ~0U) {
241 llvm::report_fatal_error("Invalid argument count for image builtin");
242 }
243
244 for (unsigned I = 0; I < E->getNumArgs(); ++I) {
246 if (I == RsrcIndex)
248 Args.push_back(V);
249 }
250
252 llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
254}
259 unsigned IntrinsicID) {
262
263 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
264 return CGF.Builder.CreateCall(F, {Src0, Src1});
265}
268 if (AMDGCNScope == "agent")
269 return "device";
270 if (AMDGCNScope == "wavefront")
271 return "subgroup";
272 return AMDGCNScope;
273}
282 llvm::AtomicOrdering &AO,
283 llvm::SyncScope::ID &SSID) {
285
286
287 assert(llvm::isValidAtomicOrderingCABI(ord));
288 switch (static_castllvm::AtomicOrderingCABI\(ord)) {
289 case llvm::AtomicOrderingCABI::acquire:
290 case llvm::AtomicOrderingCABI::consume:
291 AO = llvm::AtomicOrdering::Acquire;
292 break;
293 case llvm::AtomicOrderingCABI::release:
294 AO = llvm::AtomicOrdering::Release;
295 break;
296 case llvm::AtomicOrderingCABI::acq_rel:
297 AO = llvm::AtomicOrdering::AcquireRelease;
298 break;
299 case llvm::AtomicOrderingCABI::seq_cst:
300 AO = llvm::AtomicOrdering::SequentiallyConsistent;
301 break;
302 case llvm::AtomicOrderingCABI::relaxed:
303 AO = llvm::AtomicOrdering::Monotonic;
304 break;
305 }
306
307
308 StringRef scp;
309 if (llvm::getConstantStringInfo(Scope, scp)) {
310 if (getTarget().getTriple().isSPIRV())
313 return;
314 }
315
316
317 const char *SSN = nullptr;
319 switch (scope) {
321 SSID = llvm::SyncScope::System;
322 break;
325 break;
327 SSN = "workgroup";
328 break;
331 break;
334 break;
336 SSID = llvm::SyncScope::SingleThread;
337 break;
338 default:
339 SSID = llvm::SyncScope::System;
340 break;
341 }
342 if (SSN)
344}
348 constexpr const char *Tag = "amdgpu-synchronize-as";
349
350 LLVMContext &Ctx = Inst->getContext();
352 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
354 StringRef AS;
355 if (llvm::getConstantStringInfo(V, AS)) {
356 MMRAs.push_back({Tag, AS});
357
358 continue;
359 }
361 "expected an address space name as a string literal");
362 }
363
364 llvm::sort(MMRAs);
365 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
366 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
367}
370 switch (BuiltinID) {
371 default:
372 llvm_unreachable("Unknown BuiltinID for wave reduction");
373 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
374 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
375 return Intrinsic::amdgcn_wave_reduce_add;
376 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
377 return Intrinsic::amdgcn_wave_reduce_fadd;
378 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
379 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
380 return Intrinsic::amdgcn_wave_reduce_sub;
381 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
382 return Intrinsic::amdgcn_wave_reduce_fsub;
383 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
384 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
385 return Intrinsic::amdgcn_wave_reduce_min;
386 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
387 return Intrinsic::amdgcn_wave_reduce_fmin;
388 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
389 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
390 return Intrinsic::amdgcn_wave_reduce_umin;
391 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
392 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
393 return Intrinsic::amdgcn_wave_reduce_max;
394 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
395 return Intrinsic::amdgcn_wave_reduce_fmax;
396 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
397 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
398 return Intrinsic::amdgcn_wave_reduce_umax;
399 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
400 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
401 return Intrinsic::amdgcn_wave_reduce_and;
402 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
403 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
404 return Intrinsic::amdgcn_wave_reduce_or;
405 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
406 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
407 return Intrinsic::amdgcn_wave_reduce_xor;
408 }
409}
413 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
414 llvm::SyncScope::ID SSID;
415 switch (BuiltinID) {
416 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
417 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
418 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
419 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
420 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
421 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
422 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
423 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
424 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
425 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
426 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
427 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
428 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
429 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
430 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
431 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
432 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
433 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
434 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
435 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
436 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
437 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
441 llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});
442 return Builder.CreateCall(F, {Value, Strategy});
443 }
444 case AMDGPU::BI__builtin_amdgcn_div_scale:
445 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
446
447
448
450
454
455 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
456 X->getType());
457
458 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
459
460 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
461 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
462
463 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
464
465 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
466 Builder.CreateStore(FlagExt, FlagOutPtr);
468 }
469 case AMDGPU::BI__builtin_amdgcn_div_fmas:
470 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
475
476 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
477 Src0->getType());
478 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
479 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
480 }
481
482 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
484 Intrinsic::amdgcn_ds_swizzle);
485 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
486 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
487 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
489
490
491 unsigned ICEArguments = 0;
496 unsigned Size = DataTy->getPrimitiveSizeInBits();
497 llvm::Type *IntTy =
498 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
500 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
501 ? Intrinsic::amdgcn_mov_dpp8
502 : Intrinsic::amdgcn_update_dpp,
506 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
507 if (InsertOld)
508 Args.push_back(llvm::PoisonValue::get(IntTy));
509 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
511 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
512 Size < 32) {
513 if (!DataTy->isIntegerTy())
515 V, llvm::IntegerType::get(Builder.getContext(), Size));
517 }
518 llvm::Type *ExpTy =
519 F->getFunctionType()->getFunctionParamType(I + InsertOld);
520 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
521 }
523 if (Size < 32 && !DataTy->isIntegerTy())
525 V, llvm::IntegerType::get(Builder.getContext(), Size));
526 return Builder.CreateTruncOrBitCast(V, DataTy);
527 }
528 case AMDGPU::BI__builtin_amdgcn_permlane16:
529 case AMDGPU::BI__builtin_amdgcn_permlanex16:
531 *this, E,
532 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
533 ? Intrinsic::amdgcn_permlane16
534 : Intrinsic::amdgcn_permlanex16);
535 case AMDGPU::BI__builtin_amdgcn_permlane64:
537 Intrinsic::amdgcn_permlane64);
538 case AMDGPU::BI__builtin_amdgcn_readlane:
540 Intrinsic::amdgcn_readlane);
541 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
543 Intrinsic::amdgcn_readfirstlane);
544 case AMDGPU::BI__builtin_amdgcn_div_fixup:
545 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
546 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
548 Intrinsic::amdgcn_div_fixup);
549 case AMDGPU::BI__builtin_amdgcn_trig_preop:
550 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
551 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
552 case AMDGPU::BI__builtin_amdgcn_rcp:
553 case AMDGPU::BI__builtin_amdgcn_rcpf:
554 case AMDGPU::BI__builtin_amdgcn_rcph:
555 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:
557 case AMDGPU::BI__builtin_amdgcn_sqrt:
558 case AMDGPU::BI__builtin_amdgcn_sqrtf:
559 case AMDGPU::BI__builtin_amdgcn_sqrth:
560 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:
562 Intrinsic::amdgcn_sqrt);
563 case AMDGPU::BI__builtin_amdgcn_rsq:
564 case AMDGPU::BI__builtin_amdgcn_rsqf:
565 case AMDGPU::BI__builtin_amdgcn_rsqh:
566 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
568 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
569 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
571 Intrinsic::amdgcn_rsq_clamp);
572 case AMDGPU::BI__builtin_amdgcn_sinf:
573 case AMDGPU::BI__builtin_amdgcn_sinh:
574 case AMDGPU::BI__builtin_amdgcn_sin_bf16:
576 case AMDGPU::BI__builtin_amdgcn_cosf:
577 case AMDGPU::BI__builtin_amdgcn_cosh:
578 case AMDGPU::BI__builtin_amdgcn_cos_bf16:
580 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
581 return EmitAMDGPUDispatchPtr(*this, E);
582 case AMDGPU::BI__builtin_amdgcn_logf:
583 case AMDGPU::BI__builtin_amdgcn_log_bf16:
585 case AMDGPU::BI__builtin_amdgcn_exp2f:
586 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:
588 Intrinsic::amdgcn_exp2);
589 case AMDGPU::BI__builtin_amdgcn_log_clampf:
591 Intrinsic::amdgcn_log_clamp);
592 case AMDGPU::BI__builtin_amdgcn_ldexp:
593 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
596 llvm::Function *F =
597 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
598 return Builder.CreateCall(F, {Src0, Src1});
599 }
600 case AMDGPU::BI__builtin_amdgcn_ldexph: {
601
602
605 llvm::Function *F =
606 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
608 }
609 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
610 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
611 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
613 Intrinsic::amdgcn_frexp_mant);
614 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
615 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
617 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
619 return Builder.CreateCall(F, Src0);
620 }
621 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
623 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
625 return Builder.CreateCall(F, Src0);
626 }
627 case AMDGPU::BI__builtin_amdgcn_fract:
628 case AMDGPU::BI__builtin_amdgcn_fractf:
629 case AMDGPU::BI__builtin_amdgcn_fracth:
631 Intrinsic::amdgcn_fract);
632 case AMDGPU::BI__builtin_amdgcn_lerp:
634 Intrinsic::amdgcn_lerp);
635 case AMDGPU::BI__builtin_amdgcn_ubfe:
637 Intrinsic::amdgcn_ubfe);
638 case AMDGPU::BI__builtin_amdgcn_sbfe:
640 Intrinsic::amdgcn_sbfe);
641 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
642 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
645 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
646 return Builder.CreateCall(F, {Src});
647 }
648 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
649 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
652 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});
653 return Builder.CreateCall(F, {Src});
654 }
655 case AMDGPU::BI__builtin_amdgcn_tanhf:
656 case AMDGPU::BI__builtin_amdgcn_tanhh:
657 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:
659 Intrinsic::amdgcn_tanh);
660 case AMDGPU::BI__builtin_amdgcn_uicmp:
661 case AMDGPU::BI__builtin_amdgcn_uicmpl:
662 case AMDGPU::BI__builtin_amdgcn_sicmp:
663 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
667
668
669 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
670 { Builder.getInt64Ty(), Src0->getType() });
671 return Builder.CreateCall(F, { Src0, Src1, Src2 });
672 }
673 case AMDGPU::BI__builtin_amdgcn_fcmp:
674 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
678
679
680 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
681 { Builder.getInt64Ty(), Src0->getType() });
682 return Builder.CreateCall(F, { Src0, Src1, Src2 });
683 }
684 case AMDGPU::BI__builtin_amdgcn_class:
685 case AMDGPU::BI__builtin_amdgcn_classf:
686 case AMDGPU::BI__builtin_amdgcn_classh:
688 case AMDGPU::BI__builtin_amdgcn_fmed3f:
689 case AMDGPU::BI__builtin_amdgcn_fmed3h:
691 Intrinsic::amdgcn_fmed3);
692 case AMDGPU::BI__builtin_amdgcn_ds_append:
693 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
694 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
695 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
698 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
699 }
700 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
701 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
702 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
703 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
704 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
705 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
706 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
707 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
708 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
709 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
710 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
711 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
712 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
713 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
714 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
715 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
716 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
717 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
718 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
719 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
720 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
721 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
722 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
723 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
724 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
725 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
726 Intrinsic::ID IID;
727 switch (BuiltinID) {
728 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
729 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
730 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:
731 IID = Intrinsic::amdgcn_global_load_tr_b64;
732 break;
733 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
734 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
735 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
736 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
737 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
738 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
739 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:
740 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:
741 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:
742 IID = Intrinsic::amdgcn_global_load_tr_b128;
743 break;
744 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:
745 IID = Intrinsic::amdgcn_global_load_tr4_b64;
746 break;
747 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:
748 IID = Intrinsic::amdgcn_global_load_tr6_b96;
749 break;
750 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:
751 IID = Intrinsic::amdgcn_ds_load_tr4_b64;
752 break;
753 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:
754 IID = Intrinsic::amdgcn_ds_load_tr6_b96;
755 break;
756 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:
757 IID = Intrinsic::amdgcn_ds_load_tr8_b64;
758 break;
759 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:
760 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:
761 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:
762 IID = Intrinsic::amdgcn_ds_load_tr16_b128;
763 break;
764 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
765 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
766 break;
767 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
768 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
769 break;
770 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
771 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
772 break;
773 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
774 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
775 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
776 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
777 break;
778 }
781 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
783 }
784 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
785 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
786 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
787 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
788 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
789 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
790
791 Intrinsic::ID IID;
792 switch (BuiltinID) {
793 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
794 IID = Intrinsic::amdgcn_global_load_monitor_b32;
795 break;
796 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
797 IID = Intrinsic::amdgcn_global_load_monitor_b64;
798 break;
799 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
800 IID = Intrinsic::amdgcn_global_load_monitor_b128;
801 break;
802 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
803 IID = Intrinsic::amdgcn_flat_load_monitor_b32;
804 break;
805 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
806 IID = Intrinsic::amdgcn_flat_load_monitor_b64;
807 break;
808 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
809 IID = Intrinsic::amdgcn_flat_load_monitor_b128;
810 break;
811 }
812
816 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
817 return Builder.CreateCall(F, {Addr, Val});
818 }
819 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
820 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
821 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {
822 Intrinsic::ID IID;
823 switch (BuiltinID) {
824 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:
825 IID = Intrinsic::amdgcn_cluster_load_b32;
826 break;
827 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:
828 IID = Intrinsic::amdgcn_cluster_load_b64;
829 break;
830 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:
831 IID = Intrinsic::amdgcn_cluster_load_b128;
832 break;
833 }
835 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
838 return Builder.CreateCall(F, {Args});
839 }
840 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
841
843 Intrinsic::amdgcn_load_to_lds);
844 }
845 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
846 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
847 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
848 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
849 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
850 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
851 Intrinsic::ID IID;
852 switch (BuiltinID) {
853 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
854 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
855 break;
856 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
857 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
858 break;
859 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
860 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
861 break;
862 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
863 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
864 break;
865 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
866 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
867 break;
868 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
869 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
870 break;
871 }
872
873 LLVMContext &Ctx = CGM.getLLVMContext();
875
876 const unsigned ScopeArg = E->getNumArgs() - 1;
877 for (unsigned i = 0; i != ScopeArg; ++i)
880 ->getString();
881 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
882 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
883
884
885 llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
886 return Builder.CreateCall(F, {Args});
887 }
888 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
889 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
891 return Builder.CreateCall(F);
892 }
893 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
894 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
897 return Builder.CreateCall(F, {Env});
898 }
899 case AMDGPU::BI__builtin_amdgcn_read_exec:
901 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
903 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
905 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
906 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
907 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
908 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
915
916
917
918 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
919 {0, 1, 2});
920 RayDir =
921 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});
922 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
923 {0, 1, 2});
924
925 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
926 {NodePtr->getType(), RayDir->getType()});
927 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
928 RayInverseDir, TextureDescr});
929 }
930 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
931 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {
932 Intrinsic::ID IID;
933 switch (BuiltinID) {
934 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:
935 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;
936 break;
937 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:
938 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;
939 break;
940 }
948
951
952 llvm::Function *IntrinsicFunc = CGM.getIntrinsic(IID);
953
954 llvm::CallInst *CI = Builder.CreateCall(
955 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,
956 Offset, TextureDescr});
957
958 llvm::Value *RetVData = Builder.CreateExtractValue(CI, 0);
959 llvm::Value *RetRayOrigin = Builder.CreateExtractValue(CI, 1);
960 llvm::Value *RetRayDir = Builder.CreateExtractValue(CI, 2);
961
962 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);
963 Builder.CreateStore(RetRayDir, RetRayDirPtr);
964
965 return RetVData;
966 }
967
968 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
969 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
970 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
971 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {
972 Intrinsic::ID IID;
973 switch (BuiltinID) {
974 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:
975 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;
976 break;
977 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:
978 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;
979 break;
980 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:
981 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;
982 break;
983 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:
984 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;
985 break;
986 }
987
989 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
991
997 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
998 (uint64_t)0);
999
1000
1001 if (A->getType()->getPrimitiveSizeInBits() <
1002 RetTy->getScalarType()->getPrimitiveSizeInBits())
1003 A = Builder.CreateZExt(A, RetTy->getScalarType());
1004
1005 return Builder.CreateInsertElement(I0, A, 1);
1006 }
1007 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
1008 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
1010 *this, E, Intrinsic::amdgcn_image_load_1d, false);
1011 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
1012 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
1014 *this, E, Intrinsic::amdgcn_image_load_1darray, false);
1015 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
1016 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
1017 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
1019 *this, E, Intrinsic::amdgcn_image_load_2d, false);
1020 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
1021 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
1022 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
1024 *this, E, Intrinsic::amdgcn_image_load_2darray, false);
1025 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
1026 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
1028 *this, E, Intrinsic::amdgcn_image_load_3d, false);
1029 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
1030 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
1032 *this, E, Intrinsic::amdgcn_image_load_cube, false);
1033 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
1034 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
1036 *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
1037 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
1038 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
1040 *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
1041 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
1042 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
1043 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
1045 *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
1046 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
1047 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
1048 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
1050 *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
1051 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
1052 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
1054 *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
1055 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
1056 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
1058 *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
1059 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
1060 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
1062 *this, E, Intrinsic::amdgcn_image_store_1d, true);
1063 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
1064 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
1066 *this, E, Intrinsic::amdgcn_image_store_1darray, true);
1067 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
1068 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
1069 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
1071 *this, E, Intrinsic::amdgcn_image_store_2d, true);
1072 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
1073 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
1074 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
1076 *this, E, Intrinsic::amdgcn_image_store_2darray, true);
1077 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
1078 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
1080 *this, E, Intrinsic::amdgcn_image_store_3d, true);
1081 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
1082 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
1084 *this, E, Intrinsic::amdgcn_image_store_cube, true);
1085 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
1086 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
1088 *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
1089 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
1090 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
1092 *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
1093 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
1094 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
1095 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
1097 *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
1098 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
1099 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
1100 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
1102 *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
1103 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
1104 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
1106 *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
1107 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
1108 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
1110 *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
1111 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
1112 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
1114 *this, E, Intrinsic::amdgcn_image_sample_1d, false);
1115 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
1116 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
1118 *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
1119 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
1120 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
1121 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
1123 *this, E, Intrinsic::amdgcn_image_sample_2d, false);
1124 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
1125 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
1126 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
1128 *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
1129 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
1130 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
1132 *this, E, Intrinsic::amdgcn_image_sample_3d, false);
1133 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
1134 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
1136 *this, E, Intrinsic::amdgcn_image_sample_cube, false);
1137 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
1138 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
1140 *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
1141 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
1142 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
1144 *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
1145 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
1146 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
1148 *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
1149 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
1150 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
1151 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
1153 *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
1154 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
1155 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
1156 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
1158 *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
1159 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
1160 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
1161 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
1163 *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
1164 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
1165 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
1167 *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
1168 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
1169 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
1171 *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
1172 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
1173 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
1175 *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
1176 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
1177 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
1179 *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
1180 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
1181 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
1183 *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
1184 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
1185 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
1187 *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
1188 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
1189 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
1191 *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
1192 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
1193 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
1195 *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
1196 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
1197 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
1198 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
1200 *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
1201 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
1202 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
1203 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
1205 *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
1206 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
1207 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
1208 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
1210 *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
1211 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
1213 *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
1214 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1215 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1216 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
1218 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1219 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
1220 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
1221 {VT, VT});
1222
1224 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
1226 return Builder.CreateCall(F, Args);
1227 }
1228 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1229 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1230 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1231 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1232 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1233 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1234 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1235 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1236 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1237 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1238 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1239 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1240 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1241 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1242 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1243 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1244 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1245 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1246 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1247 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1248 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1249 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1250 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1251 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1252 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1253 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1254 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1255 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1256 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1257 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1258 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1259 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1260 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1261 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1262 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1263 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1264 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1265 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1266 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1267 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1268 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1269 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1270 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1271 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1272 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1273 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1274 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1275 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1276 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1277 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1278 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1279 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1280 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1281 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1282 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1283 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1284 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1285 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1286 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1287 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1288
1289 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1290 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1291 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1292 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1293 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1294 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1295 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1296 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1297 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1298 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1299 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1300 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1301 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1302 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1303 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1304 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1305 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1306 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1307 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1308 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1309 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1310 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1311 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1312 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1313 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1314 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1315 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1316 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1317 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1318 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1319 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1320 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1321 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1322 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1323 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1324 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1325 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1326 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1327 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1328 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1329 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1330 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1331 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
1332
1333
1334
1335
1336
1337
1338
1339
1341
1342
1343
1344 bool AppendFalseForOpselArg = false;
1345 unsigned BuiltinWMMAOp;
1346
1347 bool NeedReturnType = false;
1348
1349 switch (BuiltinID) {
1350 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
1351 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1352 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1353 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
1354 ArgsForMatchingMatrixTypes = {2, 0};
1355 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1356 break;
1357 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1358 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1359 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1360 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
1361 ArgsForMatchingMatrixTypes = {2, 0};
1362 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1363 break;
1364 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
1365 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
1366 AppendFalseForOpselArg = true;
1367 [[fallthrough]];
1368 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1369 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
1370 ArgsForMatchingMatrixTypes = {2, 0};
1371 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1372 break;
1373 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
1374 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
1375 AppendFalseForOpselArg = true;
1376 [[fallthrough]];
1377 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1378 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
1379 ArgsForMatchingMatrixTypes = {2, 0};
1380 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1381 break;
1382 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1383 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
1384 ArgsForMatchingMatrixTypes = {2, 0};
1385 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1386 break;
1387 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1388 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
1389 ArgsForMatchingMatrixTypes = {2, 0};
1390 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1391 break;
1392 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1393 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1394 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1395 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
1396 ArgsForMatchingMatrixTypes = {4, 1};
1397 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1398 break;
1399 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1400 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1401 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1402 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
1403 ArgsForMatchingMatrixTypes = {4, 1};
1404 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1405 break;
1406 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1407 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
1408 ArgsForMatchingMatrixTypes = {2, 0};
1409 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1410 break;
1411 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1412 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
1413 ArgsForMatchingMatrixTypes = {2, 0};
1414 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1415 break;
1416 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1417 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
1418 ArgsForMatchingMatrixTypes = {2, 0};
1419 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1420 break;
1421 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1422 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
1423 ArgsForMatchingMatrixTypes = {2, 0};
1424 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1425 break;
1426 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1427 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
1428 ArgsForMatchingMatrixTypes = {4, 1};
1429 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1430 break;
1431 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
1432 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
1433 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1434 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
1435 break;
1436 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
1437 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
1438 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1439 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
1440 break;
1441 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
1442 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
1443 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1444 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
1445 break;
1446 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
1447 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
1448 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1449 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
1450 break;
1451 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
1452 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
1453 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1454 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
1455 break;
1456 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
1457 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
1458 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1459 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
1460 break;
1461 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
1462 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
1463 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1464 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
1465 break;
1466 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
1467 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
1468 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1469 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
1470 break;
1471 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
1472 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
1473 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1474 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
1475 break;
1476 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
1477 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
1478 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1479 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
1480 break;
1481 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
1482 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
1483 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1484 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
1485 break;
1486
1487 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1488 ArgsForMatchingMatrixTypes = {5, 1};
1489 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1490 break;
1491 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1492 ArgsForMatchingMatrixTypes = {5, 1};
1493 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1494 break;
1495 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1496 ArgsForMatchingMatrixTypes = {5, 1};
1497 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1498 break;
1499 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1500 ArgsForMatchingMatrixTypes = {5, 1};
1501 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1502 break;
1503 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1504 ArgsForMatchingMatrixTypes = {5, 1};
1505 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1506 break;
1507 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1508 NeedReturnType = true;
1509 ArgsForMatchingMatrixTypes = {1, 5};
1510 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1511 break;
1512 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1513 ArgsForMatchingMatrixTypes = {3, 0};
1514 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1515 break;
1516 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1517 ArgsForMatchingMatrixTypes = {3, 0};
1518 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1519 break;
1520 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1521 ArgsForMatchingMatrixTypes = {3, 0};
1522 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1523 break;
1524 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1525 ArgsForMatchingMatrixTypes = {3, 0};
1526 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1527 break;
1528 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1529 ArgsForMatchingMatrixTypes = {3, 0};
1530 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1531 break;
1532 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1533 ArgsForMatchingMatrixTypes = {3, 0};
1534 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1535 break;
1536 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1537 ArgsForMatchingMatrixTypes = {3, 0};
1538 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1539 break;
1540 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1541 ArgsForMatchingMatrixTypes = {3, 0};
1542 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1543 break;
1544 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1545 ArgsForMatchingMatrixTypes = {3, 0};
1546 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1547 break;
1548 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1549 ArgsForMatchingMatrixTypes = {3, 0};
1550 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1551 break;
1552 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1553 ArgsForMatchingMatrixTypes = {3, 0};
1554 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1555 break;
1556 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1557 ArgsForMatchingMatrixTypes = {3, 0};
1558 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1559 break;
1560 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1561 ArgsForMatchingMatrixTypes = {3, 0};
1562 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1563 break;
1564 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1565 ArgsForMatchingMatrixTypes = {3, 0};
1566 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1567 break;
1568 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1569 ArgsForMatchingMatrixTypes = {3, 0};
1570 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1571 break;
1572 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1573 ArgsForMatchingMatrixTypes = {3, 0};
1574 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1575 break;
1576 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1577 ArgsForMatchingMatrixTypes = {4, 1};
1578 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1579 break;
1580 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
1581 ArgsForMatchingMatrixTypes = {5, 1, 3};
1582 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
1583 break;
1584 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1585 ArgsForMatchingMatrixTypes = {5, 1, 3};
1586 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;
1587 break;
1588 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
1589 ArgsForMatchingMatrixTypes = {5, 1, 3};
1590 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;
1591 break;
1592 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1593 ArgsForMatchingMatrixTypes = {3, 0, 1};
1594 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1595 break;
1596 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:
1597 ArgsForMatchingMatrixTypes = {3, 0, 1};
1598 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;
1599 break;
1600 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:
1601 ArgsForMatchingMatrixTypes = {3, 0, 1};
1602 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;
1603 break;
1604 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1605 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1606 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1607 break;
1608 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1609 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1610 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1611 break;
1612 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1613 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1614 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1615 break;
1616 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1617 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1618 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1619 break;
1620 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1621 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1622 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1623 break;
1624 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1625 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1626 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1627 break;
1628 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1629 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1630 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1631 break;
1632 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1633 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1634 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1635 break;
1636 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1637 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1638 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1639 break;
1640 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1641 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1642 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1643 break;
1644 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1645 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1646 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1647 break;
1648 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1649 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1650 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1651 break;
1652 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1653 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
1654 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1655 break;
1656 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1657 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
1658 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1659 break;
1660 }
1661
1663 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
1665 if (AppendFalseForOpselArg)
1666 Args.push_back(Builder.getFalse());
1667
1669 if (NeedReturnType)
1671 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
1672 ArgTypes.push_back(Args[ArgIdx]->getType());
1673
1674 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
1675 return Builder.CreateCall(F, Args);
1676 }
1677
1678 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
1679 return EmitAMDGPUWorkGroupSize(*this, 0);
1680 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
1681 return EmitAMDGPUWorkGroupSize(*this, 1);
1682 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
1683 return EmitAMDGPUWorkGroupSize(*this, 2);
1684
1685
1686 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
1687 return EmitAMDGPUGridSize(*this, 0);
1688 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
1689 return EmitAMDGPUGridSize(*this, 1);
1690 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
1691 return EmitAMDGPUGridSize(*this, 2);
1692
1693
1694 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
1695 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
1697 Intrinsic::r600_recipsqrt_ieee);
1698 case AMDGPU::BI__builtin_amdgcn_alignbit: {
1702 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1703 return Builder.CreateCall(F, { Src0, Src1, Src2 });
1704 }
1705 case AMDGPU::BI__builtin_amdgcn_fence: {
1708 FenceInst *Fence = Builder.CreateFence(AO, SSID);
1711 return Fence;
1712 }
1713 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1714 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1715 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1716 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1717 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1718 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1719 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1720 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1721 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1722 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1723 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1724 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1725 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1726 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1727 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1728 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1729 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1730 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1731 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1732 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1733 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1734 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1735 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1736 llvm::AtomicRMWInst::BinOp BinOp;
1737 switch (BuiltinID) {
1738 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1739 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
1740 BinOp = llvm::AtomicRMWInst::UIncWrap;
1741 break;
1742 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
1743 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
1744 BinOp = llvm::AtomicRMWInst::UDecWrap;
1745 break;
1746 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
1747 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
1748 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
1749 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
1750 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
1751 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1752 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1753 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1754 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1755 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1756 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1757 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
1758 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1759 BinOp = llvm::AtomicRMWInst::FAdd;
1760 break;
1761 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
1762 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1763 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1764 BinOp = llvm::AtomicRMWInst::FMin;
1765 break;
1766 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1767 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1768 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
1769 BinOp = llvm::AtomicRMWInst::FMax;
1770 break;
1771 }
1772
1775 llvm::Type *OrigTy = Val->getType();
1777
1778 bool Volatile;
1779
1780 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
1781 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
1782 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
1783
1784 Volatile =
1786 } else {
1787
1788 Volatile =
1790 }
1791
1793
1796 } else {
1797
1798
1799
1800
1801
1802
1803 if (getTarget().getTriple().isSPIRV())
1804 SSID = getLLVMContext().getOrInsertSyncScopeID("device");
1805 else
1806 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
1807 AO = AtomicOrdering::Monotonic;
1808
1809
1810 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
1811 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
1812 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1813 llvm::Type *V2BF16Ty = FixedVectorType::get(
1814 llvm::Type::getBFloatTy(Builder.getContext()), 2);
1815 Val = Builder.CreateBitCast(Val, V2BF16Ty);
1816 }
1817 }
1818
1819 llvm::AtomicRMWInst *RMW =
1820 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
1821 if (Volatile)
1822 RMW->setVolatile(true);
1823
1824 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
1825 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
1826
1827
1828 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
1829 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
1830
1831
1832
1833 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
1834 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
1835 }
1836
1837 return Builder.CreateBitCast(RMW, OrigTy);
1838 }
1839 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
1840 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
1843
1845 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
1846 return Builder.CreateCall(F, {Arg});
1847 }
1848 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
1849 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
1850
1851
1857 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
1858 ? Intrinsic::amdgcn_permlane16_swap
1859 : Intrinsic::amdgcn_permlane32_swap);
1860 llvm::CallInst *Call =
1861 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
1862
1863 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
1864 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
1865
1867
1868 llvm::Value *Insert0 = Builder.CreateInsertElement(
1869 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
1870 llvm::Value *AsVector =
1871 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
1872 return AsVector;
1873 }
1874 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
1875 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
1877 Intrinsic::amdgcn_bitop3);
1878 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {
1879
1880
1881
1883 for (unsigned I = 0; I < 4; ++I)
1885 llvm::PointerType *RetTy = llvm::PointerType::get(
1886 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);
1887 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,
1888 {RetTy, Args[0]->getType()});
1889 return Builder.CreateCall(F, Args);
1890 }
1891 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
1892 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
1893 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
1894 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
1895 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
1896 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1898 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
1899 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1900 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1901 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1902 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1903 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1904 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
1905 llvm::Type *RetTy = nullptr;
1906 switch (BuiltinID) {
1907 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
1909 break;
1910 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
1912 break;
1913 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
1915 break;
1916 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
1917 RetTy = llvm::FixedVectorType::get(Int32Ty, 2);
1918 break;
1919 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
1920 RetTy = llvm::FixedVectorType::get(Int32Ty, 3);
1921 break;
1922 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
1923 RetTy = llvm::FixedVectorType::get(Int32Ty, 4);
1924 break;
1925 }
1927 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
1928 return Builder.CreateCall(
1931 }
1932 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1934 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
1935 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1936 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1938 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
1939 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:
1940 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:
1942 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);
1943 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1944 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1946 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
1947 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
1949 *this, E, Intrinsic::amdgcn_s_prefetch_data);
1950 case Builtin::BIlogbf:
1951 case Builtin::BI__builtin_logbf: {
1954 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1955 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1956 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1958 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1963 Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
1964 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1965 Value *FCmpOEQ =
1966 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
1968 FCmpOEQ,
1969 ConstantFP::getInfinity(Builder.getFloatTy(), true), Sel1);
1970 return Sel2;
1971 }
1972 case Builtin::BIlogb:
1973 case Builtin::BI__builtin_logb: {
1976 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1977 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1978 Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1980 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1985 Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
1986 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1987 Value *FCmpOEQ =
1988 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
1990 FCmpOEQ,
1991 ConstantFP::getInfinity(Builder.getDoubleTy(), true),
1992 Sel1);
1993 return Sel2;
1994 }
1995 case Builtin::BIscalbnf:
1996 case Builtin::BI__builtin_scalbnf:
1997 case Builtin::BIscalbn:
1998 case Builtin::BI__builtin_scalbn:
2000 *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
2001 default:
2002 return nullptr;
2003 }
2004}