clang: lib/CodeGen/TargetBuiltins/AMDGPU.cpp Source File (original) (raw)

172 llvm::Type *ValueType, bool isExecHi) {

175

177 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});

178

179 if (isExecHi) {

180 Value *Rt2 = Builder.CreateLShr(Call, 32);

181 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);

182 return Rt2;

183 }

184

186}

189 llvm::Value *RsrcPtr) {

191 auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);

192

193 if (RsrcPtr->getType() == VecTy)

194 return RsrcPtr;

195

196 if (RsrcPtr->getType()->isIntegerTy(32)) {

197 llvm::PointerType *VecPtrTy =

199 llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");

200 return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");

201 }

202

203 if (RsrcPtr->getType()->isPointerTy()) {

204 auto *VecPtrTy = llvm::PointerType::get(

205 CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace());

206 llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");

207 return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");

208 }

209

211 if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)

212 return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");

213

214 llvm::report_fatal_error("Unexpected texture resource argument form");

215}

220 unsigned IntrinsicID, bool IsImageStore) {

221 auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {

223 for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {

225 if (ArgTy == TexQT) {

226 return I;

227 }

228

230 return I;

231 }

232 }

233

234 return ~0U;

235 };

236

238 unsigned RsrcIndex = findTextureDescIndex(E);

239

240 if (RsrcIndex == ~0U) {

241 llvm::report_fatal_error("Invalid argument count for image builtin");

242 }

243

244 for (unsigned I = 0; I < E->getNumArgs(); ++I) {

246 if (I == RsrcIndex)

248 Args.push_back(V);

249 }

250

252 llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);

254}

259 unsigned IntrinsicID) {

262

263 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());

264 return CGF.Builder.CreateCall(F, {Src0, Src1});

265}

268 if (AMDGCNScope == "agent")

269 return "device";

270 if (AMDGCNScope == "wavefront")

271 return "subgroup";

272 return AMDGCNScope;

273}

282 llvm::AtomicOrdering &AO,

283 llvm::SyncScope::ID &SSID) {

285

286

287 assert(llvm::isValidAtomicOrderingCABI(ord));

288 switch (static_castllvm::AtomicOrderingCABI\(ord)) {

289 case llvm::AtomicOrderingCABI::acquire:

290 case llvm::AtomicOrderingCABI::consume:

291 AO = llvm::AtomicOrdering::Acquire;

292 break;

293 case llvm::AtomicOrderingCABI::release:

294 AO = llvm::AtomicOrdering::Release;

295 break;

296 case llvm::AtomicOrderingCABI::acq_rel:

297 AO = llvm::AtomicOrdering::AcquireRelease;

298 break;

299 case llvm::AtomicOrderingCABI::seq_cst:

300 AO = llvm::AtomicOrdering::SequentiallyConsistent;

301 break;

302 case llvm::AtomicOrderingCABI::relaxed:

303 AO = llvm::AtomicOrdering::Monotonic;

304 break;

305 }

306

307

308 StringRef scp;

309 if (llvm::getConstantStringInfo(Scope, scp)) {

310 if (getTarget().getTriple().isSPIRV())

313 return;

314 }

315

316

317 const char *SSN = nullptr;

319 switch (scope) {

321 SSID = llvm::SyncScope::System;

322 break;

325 break;

327 SSN = "workgroup";

328 break;

331 break;

334 break;

336 SSID = llvm::SyncScope::SingleThread;

337 break;

338 default:

339 SSID = llvm::SyncScope::System;

340 break;

341 }

342 if (SSN)

344}

348 constexpr const char *Tag = "amdgpu-synchronize-as";

349

350 LLVMContext &Ctx = Inst->getContext();

352 for (unsigned K = 2; K < E->getNumArgs(); ++K) {

354 StringRef AS;

355 if (llvm::getConstantStringInfo(V, AS)) {

356 MMRAs.push_back({Tag, AS});

357

358 continue;

359 }

361 "expected an address space name as a string literal");

362 }

363

364 llvm::sort(MMRAs);

365 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());

366 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));

367}

370 switch (BuiltinID) {

371 default:

372 llvm_unreachable("Unknown BuiltinID for wave reduction");

373 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:

374 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:

375 return Intrinsic::amdgcn_wave_reduce_add;

376 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:

377 return Intrinsic::amdgcn_wave_reduce_fadd;

378 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:

379 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:

380 return Intrinsic::amdgcn_wave_reduce_sub;

381 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:

382 return Intrinsic::amdgcn_wave_reduce_fsub;

383 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:

384 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:

385 return Intrinsic::amdgcn_wave_reduce_min;

386 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:

387 return Intrinsic::amdgcn_wave_reduce_fmin;

388 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:

389 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:

390 return Intrinsic::amdgcn_wave_reduce_umin;

391 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:

392 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:

393 return Intrinsic::amdgcn_wave_reduce_max;

394 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:

395 return Intrinsic::amdgcn_wave_reduce_fmax;

396 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:

397 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:

398 return Intrinsic::amdgcn_wave_reduce_umax;

399 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:

400 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:

401 return Intrinsic::amdgcn_wave_reduce_and;

402 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:

403 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:

404 return Intrinsic::amdgcn_wave_reduce_or;

405 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:

406 case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:

407 return Intrinsic::amdgcn_wave_reduce_xor;

408 }

409}

413 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;

414 llvm::SyncScope::ID SSID;

415 switch (BuiltinID) {

416 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:

417 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:

418 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:

419 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:

420 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:

421 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:

422 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:

423 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:

424 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:

425 case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:

426 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:

427 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:

428 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:

429 case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:

430 case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:

431 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:

432 case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:

433 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:

434 case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:

435 case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:

436 case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:

437 case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {

441 llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});

442 return Builder.CreateCall(F, {Value, Strategy});

443 }

444 case AMDGPU::BI__builtin_amdgcn_div_scale:

445 case AMDGPU::BI__builtin_amdgcn_div_scalef: {

446

447

448

450

454

455 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,

456 X->getType());

457

458 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});

459

460 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);

461 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);

462

463 llvm::Type *RealFlagType = FlagOutPtr.getElementType();

464

465 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);

466 Builder.CreateStore(FlagExt, FlagOutPtr);

468 }

469 case AMDGPU::BI__builtin_amdgcn_div_fmas:

470 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {

475

476 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,

477 Src0->getType());

478 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);

479 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});

480 }

481

482 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:

484 Intrinsic::amdgcn_ds_swizzle);

485 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:

486 case AMDGPU::BI__builtin_amdgcn_mov_dpp:

487 case AMDGPU::BI__builtin_amdgcn_update_dpp: {

489

490

491 unsigned ICEArguments = 0;

496 unsigned Size = DataTy->getPrimitiveSizeInBits();

497 llvm::Type *IntTy =

498 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));

500 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8

501 ? Intrinsic::amdgcn_mov_dpp8

502 : Intrinsic::amdgcn_update_dpp,

506 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;

507 if (InsertOld)

508 Args.push_back(llvm::PoisonValue::get(IntTy));

509 for (unsigned I = 0; I != E->getNumArgs(); ++I) {

511 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&

512 Size < 32) {

513 if (!DataTy->isIntegerTy())

515 V, llvm::IntegerType::get(Builder.getContext(), Size));

517 }

518 llvm::Type *ExpTy =

519 F->getFunctionType()->getFunctionParamType(I + InsertOld);

520 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));

521 }

523 if (Size < 32 && !DataTy->isIntegerTy())

525 V, llvm::IntegerType::get(Builder.getContext(), Size));

526 return Builder.CreateTruncOrBitCast(V, DataTy);

527 }

528 case AMDGPU::BI__builtin_amdgcn_permlane16:

529 case AMDGPU::BI__builtin_amdgcn_permlanex16:

531 *this, E,

532 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16

533 ? Intrinsic::amdgcn_permlane16

534 : Intrinsic::amdgcn_permlanex16);

535 case AMDGPU::BI__builtin_amdgcn_permlane64:

537 Intrinsic::amdgcn_permlane64);

538 case AMDGPU::BI__builtin_amdgcn_readlane:

540 Intrinsic::amdgcn_readlane);

541 case AMDGPU::BI__builtin_amdgcn_readfirstlane:

543 Intrinsic::amdgcn_readfirstlane);

544 case AMDGPU::BI__builtin_amdgcn_div_fixup:

545 case AMDGPU::BI__builtin_amdgcn_div_fixupf:

546 case AMDGPU::BI__builtin_amdgcn_div_fixuph:

548 Intrinsic::amdgcn_div_fixup);

549 case AMDGPU::BI__builtin_amdgcn_trig_preop:

550 case AMDGPU::BI__builtin_amdgcn_trig_preopf:

551 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);

552 case AMDGPU::BI__builtin_amdgcn_rcp:

553 case AMDGPU::BI__builtin_amdgcn_rcpf:

554 case AMDGPU::BI__builtin_amdgcn_rcph:

555 case AMDGPU::BI__builtin_amdgcn_rcp_bf16:

557 case AMDGPU::BI__builtin_amdgcn_sqrt:

558 case AMDGPU::BI__builtin_amdgcn_sqrtf:

559 case AMDGPU::BI__builtin_amdgcn_sqrth:

560 case AMDGPU::BI__builtin_amdgcn_sqrt_bf16:

562 Intrinsic::amdgcn_sqrt);

563 case AMDGPU::BI__builtin_amdgcn_rsq:

564 case AMDGPU::BI__builtin_amdgcn_rsqf:

565 case AMDGPU::BI__builtin_amdgcn_rsqh:

566 case AMDGPU::BI__builtin_amdgcn_rsq_bf16:

568 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:

569 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:

571 Intrinsic::amdgcn_rsq_clamp);

572 case AMDGPU::BI__builtin_amdgcn_sinf:

573 case AMDGPU::BI__builtin_amdgcn_sinh:

574 case AMDGPU::BI__builtin_amdgcn_sin_bf16:

576 case AMDGPU::BI__builtin_amdgcn_cosf:

577 case AMDGPU::BI__builtin_amdgcn_cosh:

578 case AMDGPU::BI__builtin_amdgcn_cos_bf16:

580 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:

581 return EmitAMDGPUDispatchPtr(*this, E);

582 case AMDGPU::BI__builtin_amdgcn_logf:

583 case AMDGPU::BI__builtin_amdgcn_log_bf16:

585 case AMDGPU::BI__builtin_amdgcn_exp2f:

586 case AMDGPU::BI__builtin_amdgcn_exp2_bf16:

588 Intrinsic::amdgcn_exp2);

589 case AMDGPU::BI__builtin_amdgcn_log_clampf:

591 Intrinsic::amdgcn_log_clamp);

592 case AMDGPU::BI__builtin_amdgcn_ldexp:

593 case AMDGPU::BI__builtin_amdgcn_ldexpf: {

596 llvm::Function *F =

597 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});

598 return Builder.CreateCall(F, {Src0, Src1});

599 }

600 case AMDGPU::BI__builtin_amdgcn_ldexph: {

601

602

605 llvm::Function *F =

606 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});

608 }

609 case AMDGPU::BI__builtin_amdgcn_frexp_mant:

610 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:

611 case AMDGPU::BI__builtin_amdgcn_frexp_manth:

613 Intrinsic::amdgcn_frexp_mant);

614 case AMDGPU::BI__builtin_amdgcn_frexp_exp:

615 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {

617 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,

619 return Builder.CreateCall(F, Src0);

620 }

621 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {

623 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,

625 return Builder.CreateCall(F, Src0);

626 }

627 case AMDGPU::BI__builtin_amdgcn_fract:

628 case AMDGPU::BI__builtin_amdgcn_fractf:

629 case AMDGPU::BI__builtin_amdgcn_fracth:

631 Intrinsic::amdgcn_fract);

632 case AMDGPU::BI__builtin_amdgcn_lerp:

634 Intrinsic::amdgcn_lerp);

635 case AMDGPU::BI__builtin_amdgcn_ubfe:

637 Intrinsic::amdgcn_ubfe);

638 case AMDGPU::BI__builtin_amdgcn_sbfe:

640 Intrinsic::amdgcn_sbfe);

641 case AMDGPU::BI__builtin_amdgcn_ballot_w32:

642 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {

645 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});

646 return Builder.CreateCall(F, {Src});

647 }

648 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:

649 case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {

652 CGM.getIntrinsic(Intrinsic::amdgcn_inverse_ballot, {Src->getType()});

653 return Builder.CreateCall(F, {Src});

654 }

655 case AMDGPU::BI__builtin_amdgcn_tanhf:

656 case AMDGPU::BI__builtin_amdgcn_tanhh:

657 case AMDGPU::BI__builtin_amdgcn_tanh_bf16:

659 Intrinsic::amdgcn_tanh);

660 case AMDGPU::BI__builtin_amdgcn_uicmp:

661 case AMDGPU::BI__builtin_amdgcn_uicmpl:

662 case AMDGPU::BI__builtin_amdgcn_sicmp:

663 case AMDGPU::BI__builtin_amdgcn_sicmpl: {

667

668

669 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,

670 { Builder.getInt64Ty(), Src0->getType() });

671 return Builder.CreateCall(F, { Src0, Src1, Src2 });

672 }

673 case AMDGPU::BI__builtin_amdgcn_fcmp:

674 case AMDGPU::BI__builtin_amdgcn_fcmpf: {

678

679

680 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,

681 { Builder.getInt64Ty(), Src0->getType() });

682 return Builder.CreateCall(F, { Src0, Src1, Src2 });

683 }

684 case AMDGPU::BI__builtin_amdgcn_class:

685 case AMDGPU::BI__builtin_amdgcn_classf:

686 case AMDGPU::BI__builtin_amdgcn_classh:

688 case AMDGPU::BI__builtin_amdgcn_fmed3f:

689 case AMDGPU::BI__builtin_amdgcn_fmed3h:

691 Intrinsic::amdgcn_fmed3);

692 case AMDGPU::BI__builtin_amdgcn_ds_append:

693 case AMDGPU::BI__builtin_amdgcn_ds_consume: {

694 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?

695 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;

698 return Builder.CreateCall(F, { Src0, Builder.getFalse() });

699 }

700 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:

701 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:

702 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:

703 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:

704 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:

705 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:

706 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:

707 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:

708 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:

709 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:

710 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:

711 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:

712 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:

713 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:

714 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:

715 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:

716 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:

717 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:

718 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:

719 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:

720 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:

721 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:

722 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:

723 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:

724 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:

725 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {

726 Intrinsic::ID IID;

727 switch (BuiltinID) {

728 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:

729 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:

730 case AMDGPU::BI__builtin_amdgcn_global_load_tr8_b64_v2i32:

731 IID = Intrinsic::amdgcn_global_load_tr_b64;

732 break;

733 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:

734 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:

735 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:

736 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:

737 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:

738 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:

739 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8i16:

740 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8f16:

741 case AMDGPU::BI__builtin_amdgcn_global_load_tr16_b128_v8bf16:

742 IID = Intrinsic::amdgcn_global_load_tr_b128;

743 break;

744 case AMDGPU::BI__builtin_amdgcn_global_load_tr4_b64_v2i32:

745 IID = Intrinsic::amdgcn_global_load_tr4_b64;

746 break;

747 case AMDGPU::BI__builtin_amdgcn_global_load_tr6_b96_v3i32:

748 IID = Intrinsic::amdgcn_global_load_tr6_b96;

749 break;

750 case AMDGPU::BI__builtin_amdgcn_ds_load_tr4_b64_v2i32:

751 IID = Intrinsic::amdgcn_ds_load_tr4_b64;

752 break;

753 case AMDGPU::BI__builtin_amdgcn_ds_load_tr6_b96_v3i32:

754 IID = Intrinsic::amdgcn_ds_load_tr6_b96;

755 break;

756 case AMDGPU::BI__builtin_amdgcn_ds_load_tr8_b64_v2i32:

757 IID = Intrinsic::amdgcn_ds_load_tr8_b64;

758 break;

759 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8i16:

760 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8f16:

761 case AMDGPU::BI__builtin_amdgcn_ds_load_tr16_b128_v8bf16:

762 IID = Intrinsic::amdgcn_ds_load_tr16_b128;

763 break;

764 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:

765 IID = Intrinsic::amdgcn_ds_read_tr4_b64;

766 break;

767 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:

768 IID = Intrinsic::amdgcn_ds_read_tr8_b64;

769 break;

770 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:

771 IID = Intrinsic::amdgcn_ds_read_tr6_b96;

772 break;

773 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:

774 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:

775 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:

776 IID = Intrinsic::amdgcn_ds_read_tr16_b64;

777 break;

778 }

781 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});

783 }

784 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:

785 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:

786 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:

787 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:

788 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:

789 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {

790

791 Intrinsic::ID IID;

792 switch (BuiltinID) {

793 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:

794 IID = Intrinsic::amdgcn_global_load_monitor_b32;

795 break;

796 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:

797 IID = Intrinsic::amdgcn_global_load_monitor_b64;

798 break;

799 case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:

800 IID = Intrinsic::amdgcn_global_load_monitor_b128;

801 break;

802 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:

803 IID = Intrinsic::amdgcn_flat_load_monitor_b32;

804 break;

805 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:

806 IID = Intrinsic::amdgcn_flat_load_monitor_b64;

807 break;

808 case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:

809 IID = Intrinsic::amdgcn_flat_load_monitor_b128;

810 break;

811 }

812

816 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});

817 return Builder.CreateCall(F, {Addr, Val});

818 }

819 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:

820 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:

821 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128: {

822 Intrinsic::ID IID;

823 switch (BuiltinID) {

824 case AMDGPU::BI__builtin_amdgcn_cluster_load_b32:

825 IID = Intrinsic::amdgcn_cluster_load_b32;

826 break;

827 case AMDGPU::BI__builtin_amdgcn_cluster_load_b64:

828 IID = Intrinsic::amdgcn_cluster_load_b64;

829 break;

830 case AMDGPU::BI__builtin_amdgcn_cluster_load_b128:

831 IID = Intrinsic::amdgcn_cluster_load_b128;

832 break;

833 }

835 for (int i = 0, e = E->getNumArgs(); i != e; ++i)

838 return Builder.CreateCall(F, {Args});

839 }

840 case AMDGPU::BI__builtin_amdgcn_load_to_lds: {

841

843 Intrinsic::amdgcn_load_to_lds);

844 }

845 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:

846 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:

847 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:

848 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:

849 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:

850 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {

851 Intrinsic::ID IID;

852 switch (BuiltinID) {

853 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:

854 IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;

855 break;

856 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:

857 IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;

858 break;

859 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:

860 IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;

861 break;

862 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:

863 IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;

864 break;

865 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:

866 IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;

867 break;

868 case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:

869 IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;

870 break;

871 }

872

873 LLVMContext &Ctx = CGM.getLLVMContext();

875

876 const unsigned ScopeArg = E->getNumArgs() - 1;

877 for (unsigned i = 0; i != ScopeArg; ++i)

880 ->getString();

881 llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});

882 Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));

883

884

885 llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});

886 return Builder.CreateCall(F, {Args});

887 }

888 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {

889 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,

891 return Builder.CreateCall(F);

892 }

893 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {

894 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,

897 return Builder.CreateCall(F, {Env});

898 }

899 case AMDGPU::BI__builtin_amdgcn_read_exec:

901 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:

903 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:

905 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:

906 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:

907 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:

908 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {

915

916

917

918 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,

919 {0, 1, 2});

920 RayDir =

921 Builder.CreateShuffleVector(RayDir, RayDir, {0, 1, 2});

922 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,

923 {0, 1, 2});

924

925 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,

926 {NodePtr->getType(), RayDir->getType()});

927 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,

928 RayInverseDir, TextureDescr});

929 }

930 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:

931 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray: {

932 Intrinsic::ID IID;

933 switch (BuiltinID) {

934 case AMDGPU::BI__builtin_amdgcn_image_bvh8_intersect_ray:

935 IID = Intrinsic::amdgcn_image_bvh8_intersect_ray;

936 break;

937 case AMDGPU::BI__builtin_amdgcn_image_bvh_dual_intersect_ray:

938 IID = Intrinsic::amdgcn_image_bvh_dual_intersect_ray;

939 break;

940 }

948

951

952 llvm::Function *IntrinsicFunc = CGM.getIntrinsic(IID);

953

954 llvm::CallInst *CI = Builder.CreateCall(

955 IntrinsicFunc, {NodePtr, RayExtent, InstanceMask, RayOrigin, RayDir,

956 Offset, TextureDescr});

957

958 llvm::Value *RetVData = Builder.CreateExtractValue(CI, 0);

959 llvm::Value *RetRayOrigin = Builder.CreateExtractValue(CI, 1);

960 llvm::Value *RetRayDir = Builder.CreateExtractValue(CI, 2);

961

962 Builder.CreateStore(RetRayOrigin, RetRayOriginPtr);

963 Builder.CreateStore(RetRayDir, RetRayDirPtr);

964

965 return RetVData;

966 }

967

968 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:

969 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:

970 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:

971 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn: {

972 Intrinsic::ID IID;

973 switch (BuiltinID) {

974 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn:

975 IID = Intrinsic::amdgcn_ds_bvh_stack_rtn;

976 break;

977 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn:

978 IID = Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn;

979 break;

980 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn:

981 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn;

982 break;

983 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn:

984 IID = Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn;

985 break;

986 }

987

989 for (int i = 0, e = E->getNumArgs(); i != e; ++i)

991

997 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,

998 (uint64_t)0);

999

1000

1001 if (A->getType()->getPrimitiveSizeInBits() <

1002 RetTy->getScalarType()->getPrimitiveSizeInBits())

1003 A = Builder.CreateZExt(A, RetTy->getScalarType());

1004

1005 return Builder.CreateInsertElement(I0, A, 1);

1006 }

1007 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:

1008 case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:

1010 *this, E, Intrinsic::amdgcn_image_load_1d, false);

1011 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:

1012 case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:

1014 *this, E, Intrinsic::amdgcn_image_load_1darray, false);

1015 case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:

1016 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:

1017 case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:

1019 *this, E, Intrinsic::amdgcn_image_load_2d, false);

1020 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:

1021 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:

1022 case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:

1024 *this, E, Intrinsic::amdgcn_image_load_2darray, false);

1025 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:

1026 case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:

1028 *this, E, Intrinsic::amdgcn_image_load_3d, false);

1029 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:

1030 case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:

1032 *this, E, Intrinsic::amdgcn_image_load_cube, false);

1033 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:

1034 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:

1036 *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);

1037 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:

1038 case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:

1040 *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);

1041 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:

1042 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:

1043 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:

1045 *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);

1046 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:

1047 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:

1048 case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:

1050 *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);

1051 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:

1052 case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:

1054 *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);

1055 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:

1056 case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:

1058 *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);

1059 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:

1060 case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:

1062 *this, E, Intrinsic::amdgcn_image_store_1d, true);

1063 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:

1064 case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:

1066 *this, E, Intrinsic::amdgcn_image_store_1darray, true);

1067 case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:

1068 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:

1069 case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:

1071 *this, E, Intrinsic::amdgcn_image_store_2d, true);

1072 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:

1073 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:

1074 case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:

1076 *this, E, Intrinsic::amdgcn_image_store_2darray, true);

1077 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:

1078 case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:

1080 *this, E, Intrinsic::amdgcn_image_store_3d, true);

1081 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:

1082 case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:

1084 *this, E, Intrinsic::amdgcn_image_store_cube, true);

1085 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:

1086 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:

1088 *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);

1089 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:

1090 case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:

1092 *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);

1093 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:

1094 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:

1095 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:

1097 *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);

1098 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:

1099 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:

1100 case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:

1102 *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);

1103 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:

1104 case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:

1106 *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);

1107 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:

1108 case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:

1110 *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);

1111 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:

1112 case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:

1114 *this, E, Intrinsic::amdgcn_image_sample_1d, false);

1115 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:

1116 case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:

1118 *this, E, Intrinsic::amdgcn_image_sample_1darray, false);

1119 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:

1120 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:

1121 case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:

1123 *this, E, Intrinsic::amdgcn_image_sample_2d, false);

1124 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:

1125 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:

1126 case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:

1128 *this, E, Intrinsic::amdgcn_image_sample_2darray, false);

1129 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:

1130 case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:

1132 *this, E, Intrinsic::amdgcn_image_sample_3d, false);

1133 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:

1134 case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:

1136 *this, E, Intrinsic::amdgcn_image_sample_cube, false);

1137 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:

1138 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:

1140 *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);

1141 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:

1142 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:

1144 *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);

1145 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:

1146 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:

1148 *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);

1149 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:

1150 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:

1151 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:

1153 *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);

1154 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:

1155 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:

1156 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:

1158 *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);

1159 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:

1160 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:

1161 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:

1163 *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);

1164 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:

1165 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:

1167 *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);

1168 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:

1169 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:

1171 *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);

1172 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:

1173 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:

1175 *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);

1176 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:

1177 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:

1179 *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);

1180 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:

1181 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:

1183 *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);

1184 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:

1185 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:

1187 *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);

1188 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:

1189 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:

1191 *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);

1192 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:

1193 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:

1195 *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);

1196 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:

1197 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:

1198 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:

1200 *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);

1201 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:

1202 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:

1203 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:

1205 *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);

1206 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:

1207 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:

1208 case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:

1210 *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);

1211 case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:

1213 *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);

1214 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:

1215 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {

1216 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);

1218 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4

1219 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4

1220 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,

1221 {VT, VT});

1222

1224 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)

1226 return Builder.CreateCall(F, Args);

1227 }

1228 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:

1229 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:

1230 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:

1231 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:

1232 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:

1233 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:

1234 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:

1235 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:

1236 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:

1237 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:

1238 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:

1239 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:

1240 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:

1241 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:

1242 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:

1243 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:

1244 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:

1245 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:

1246 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:

1247 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:

1248 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:

1249 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:

1250 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:

1251 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:

1252 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:

1253 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:

1254 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:

1255 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:

1256 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:

1257 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:

1258 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:

1259 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:

1260 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:

1261 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:

1262 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:

1263 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:

1264 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:

1265 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:

1266 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:

1267 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:

1268 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:

1269 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:

1270 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:

1271 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:

1272 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:

1273 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:

1274 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:

1275 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:

1276 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:

1277 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:

1278 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:

1279 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:

1280 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:

1281 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:

1282 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:

1283 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:

1284 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:

1285 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:

1286 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:

1287 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:

1288

1289 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:

1290 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:

1291 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:

1292 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:

1293 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:

1294 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:

1295 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:

1296 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:

1297 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:

1298 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:

1299 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:

1300 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:

1301 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:

1302 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:

1303 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:

1304 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:

1305 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:

1306 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:

1307 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:

1308 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:

1309 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:

1310 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:

1311 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:

1312 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:

1313 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:

1314 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:

1315 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:

1316 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:

1317 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:

1318 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:

1319 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:

1320 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:

1321 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:

1322 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:

1323 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:

1324 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:

1325 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:

1326 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:

1327 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:

1328 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:

1329 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:

1330 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:

1331 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {

1332

1333

1334

1335

1336

1337

1338

1339

1341

1342

1343

1344 bool AppendFalseForOpselArg = false;

1345 unsigned BuiltinWMMAOp;

1346

1347 bool NeedReturnType = false;

1348

1349 switch (BuiltinID) {

1350 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:

1351 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:

1352 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:

1353 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:

1354 ArgsForMatchingMatrixTypes = {2, 0};

1355 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;

1356 break;

1357 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:

1358 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:

1359 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:

1360 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:

1361 ArgsForMatchingMatrixTypes = {2, 0};

1362 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;

1363 break;

1364 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:

1365 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:

1366 AppendFalseForOpselArg = true;

1367 [[fallthrough]];

1368 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:

1369 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:

1370 ArgsForMatchingMatrixTypes = {2, 0};

1371 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;

1372 break;

1373 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:

1374 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:

1375 AppendFalseForOpselArg = true;

1376 [[fallthrough]];

1377 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:

1378 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:

1379 ArgsForMatchingMatrixTypes = {2, 0};

1380 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;

1381 break;

1382 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:

1383 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:

1384 ArgsForMatchingMatrixTypes = {2, 0};

1385 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;

1386 break;

1387 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:

1388 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:

1389 ArgsForMatchingMatrixTypes = {2, 0};

1390 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;

1391 break;

1392 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:

1393 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:

1394 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:

1395 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:

1396 ArgsForMatchingMatrixTypes = {4, 1};

1397 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;

1398 break;

1399 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:

1400 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:

1401 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:

1402 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:

1403 ArgsForMatchingMatrixTypes = {4, 1};

1404 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;

1405 break;

1406 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:

1407 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:

1408 ArgsForMatchingMatrixTypes = {2, 0};

1409 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;

1410 break;

1411 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:

1412 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:

1413 ArgsForMatchingMatrixTypes = {2, 0};

1414 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;

1415 break;

1416 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:

1417 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:

1418 ArgsForMatchingMatrixTypes = {2, 0};

1419 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;

1420 break;

1421 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:

1422 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:

1423 ArgsForMatchingMatrixTypes = {2, 0};

1424 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;

1425 break;

1426 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:

1427 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:

1428 ArgsForMatchingMatrixTypes = {4, 1};

1429 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;

1430 break;

1431 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:

1432 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:

1433 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1434 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;

1435 break;

1436 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:

1437 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:

1438 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1439 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;

1440 break;

1441 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:

1442 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:

1443 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1444 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;

1445 break;

1446 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:

1447 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:

1448 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1449 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;

1450 break;

1451 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:

1452 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:

1453 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1454 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;

1455 break;

1456 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:

1457 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:

1458 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1459 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;

1460 break;

1461 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:

1462 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:

1463 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1464 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;

1465 break;

1466 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:

1467 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:

1468 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1469 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;

1470 break;

1471 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:

1472 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:

1473 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1474 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;

1475 break;

1476 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:

1477 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:

1478 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1479 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;

1480 break;

1481 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:

1482 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:

1483 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1484 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;

1485 break;

1486

1487 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:

1488 ArgsForMatchingMatrixTypes = {5, 1};

1489 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;

1490 break;

1491 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:

1492 ArgsForMatchingMatrixTypes = {5, 1};

1493 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;

1494 break;

1495 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:

1496 ArgsForMatchingMatrixTypes = {5, 1};

1497 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;

1498 break;

1499 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:

1500 ArgsForMatchingMatrixTypes = {5, 1};

1501 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;

1502 break;

1503 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:

1504 ArgsForMatchingMatrixTypes = {5, 1};

1505 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;

1506 break;

1507 case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:

1508 NeedReturnType = true;

1509 ArgsForMatchingMatrixTypes = {1, 5};

1510 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;

1511 break;

1512 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:

1513 ArgsForMatchingMatrixTypes = {3, 0};

1514 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;

1515 break;

1516 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:

1517 ArgsForMatchingMatrixTypes = {3, 0};

1518 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;

1519 break;

1520 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:

1521 ArgsForMatchingMatrixTypes = {3, 0};

1522 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;

1523 break;

1524 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:

1525 ArgsForMatchingMatrixTypes = {3, 0};

1526 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;

1527 break;

1528 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:

1529 ArgsForMatchingMatrixTypes = {3, 0};

1530 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;

1531 break;

1532 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:

1533 ArgsForMatchingMatrixTypes = {3, 0};

1534 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;

1535 break;

1536 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:

1537 ArgsForMatchingMatrixTypes = {3, 0};

1538 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;

1539 break;

1540 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:

1541 ArgsForMatchingMatrixTypes = {3, 0};

1542 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;

1543 break;

1544 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:

1545 ArgsForMatchingMatrixTypes = {3, 0};

1546 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;

1547 break;

1548 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:

1549 ArgsForMatchingMatrixTypes = {3, 0};

1550 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;

1551 break;

1552 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:

1553 ArgsForMatchingMatrixTypes = {3, 0};

1554 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;

1555 break;

1556 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:

1557 ArgsForMatchingMatrixTypes = {3, 0};

1558 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;

1559 break;

1560 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:

1561 ArgsForMatchingMatrixTypes = {3, 0};

1562 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;

1563 break;

1564 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:

1565 ArgsForMatchingMatrixTypes = {3, 0};

1566 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;

1567 break;

1568 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:

1569 ArgsForMatchingMatrixTypes = {3, 0};

1570 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;

1571 break;

1572 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:

1573 ArgsForMatchingMatrixTypes = {3, 0};

1574 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;

1575 break;

1576 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:

1577 ArgsForMatchingMatrixTypes = {4, 1};

1578 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;

1579 break;

1580 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:

1581 ArgsForMatchingMatrixTypes = {5, 1, 3};

1582 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;

1583 break;

1584 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4:

1585 ArgsForMatchingMatrixTypes = {5, 1, 3};

1586 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4;

1587 break;

1588 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:

1589 ArgsForMatchingMatrixTypes = {5, 1, 3};

1590 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4;

1591 break;

1592 case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:

1593 ArgsForMatchingMatrixTypes = {3, 0, 1};

1594 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;

1595 break;

1596 case AMDGPU::BI__builtin_amdgcn_wmma_scale_f32_32x16x128_f4:

1597 ArgsForMatchingMatrixTypes = {3, 0, 1};

1598 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4;

1599 break;

1600 case AMDGPU::BI__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4:

1601 ArgsForMatchingMatrixTypes = {3, 0, 1};

1602 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4;

1603 break;

1604 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:

1605 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1606 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;

1607 break;

1608 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:

1609 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1610 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;

1611 break;

1612 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:

1613 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1614 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;

1615 break;

1616 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:

1617 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1618 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;

1619 break;

1620 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:

1621 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1622 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;

1623 break;

1624 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:

1625 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1626 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;

1627 break;

1628 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:

1629 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1630 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;

1631 break;

1632 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:

1633 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1634 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;

1635 break;

1636 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:

1637 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1638 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;

1639 break;

1640 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:

1641 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1642 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;

1643 break;

1644 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:

1645 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1646 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;

1647 break;

1648 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:

1649 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1650 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;

1651 break;

1652 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:

1653 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};

1654 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;

1655 break;

1656 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:

1657 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};

1658 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;

1659 break;

1660 }

1661

1663 for (int i = 0, e = E->getNumArgs(); i != e; ++i)

1665 if (AppendFalseForOpselArg)

1666 Args.push_back(Builder.getFalse());

1667

1669 if (NeedReturnType)

1671 for (auto ArgIdx : ArgsForMatchingMatrixTypes)

1672 ArgTypes.push_back(Args[ArgIdx]->getType());

1673

1674 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);

1675 return Builder.CreateCall(F, Args);

1676 }

1677

1678 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:

1679 return EmitAMDGPUWorkGroupSize(*this, 0);

1680 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:

1681 return EmitAMDGPUWorkGroupSize(*this, 1);

1682 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:

1683 return EmitAMDGPUWorkGroupSize(*this, 2);

1684

1685

1686 case AMDGPU::BI__builtin_amdgcn_grid_size_x:

1687 return EmitAMDGPUGridSize(*this, 0);

1688 case AMDGPU::BI__builtin_amdgcn_grid_size_y:

1689 return EmitAMDGPUGridSize(*this, 1);

1690 case AMDGPU::BI__builtin_amdgcn_grid_size_z:

1691 return EmitAMDGPUGridSize(*this, 2);

1692

1693

1694 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:

1695 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:

1697 Intrinsic::r600_recipsqrt_ieee);

1698 case AMDGPU::BI__builtin_amdgcn_alignbit: {

1702 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());

1703 return Builder.CreateCall(F, { Src0, Src1, Src2 });

1704 }

1705 case AMDGPU::BI__builtin_amdgcn_fence: {

1708 FenceInst *Fence = Builder.CreateFence(AO, SSID);

1711 return Fence;

1712 }

1713 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:

1714 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:

1715 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:

1716 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:

1717 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:

1718 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:

1719 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:

1720 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:

1721 case AMDGPU::BI__builtin_amdgcn_ds_faddf:

1722 case AMDGPU::BI__builtin_amdgcn_ds_fminf:

1723 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:

1724 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:

1725 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:

1726 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:

1727 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:

1728 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:

1729 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:

1730 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:

1731 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:

1732 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:

1733 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:

1734 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:

1735 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {

1736 llvm::AtomicRMWInst::BinOp BinOp;

1737 switch (BuiltinID) {

1738 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:

1739 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:

1740 BinOp = llvm::AtomicRMWInst::UIncWrap;

1741 break;

1742 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:

1743 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:

1744 BinOp = llvm::AtomicRMWInst::UDecWrap;

1745 break;

1746 case AMDGPU::BI__builtin_amdgcn_ds_faddf:

1747 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:

1748 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:

1749 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:

1750 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:

1751 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:

1752 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:

1753 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:

1754 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:

1755 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:

1756 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:

1757 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:

1758 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:

1759 BinOp = llvm::AtomicRMWInst::FAdd;

1760 break;

1761 case AMDGPU::BI__builtin_amdgcn_ds_fminf:

1762 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:

1763 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:

1764 BinOp = llvm::AtomicRMWInst::FMin;

1765 break;

1766 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:

1767 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:

1768 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:

1769 BinOp = llvm::AtomicRMWInst::FMax;

1770 break;

1771 }

1772

1775 llvm::Type *OrigTy = Val->getType();

1777

1778 bool Volatile;

1779

1780 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||

1781 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||

1782 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {

1783

1784 Volatile =

1786 } else {

1787

1788 Volatile =

1790 }

1791

1793

1796 } else {

1797

1798

1799

1800

1801

1802

1803 if (getTarget().getTriple().isSPIRV())

1804 SSID = getLLVMContext().getOrInsertSyncScopeID("device");

1805 else

1806 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");

1807 AO = AtomicOrdering::Monotonic;

1808

1809

1810 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||

1811 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||

1812 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {

1813 llvm::Type *V2BF16Ty = FixedVectorType::get(

1814 llvm::Type::getBFloatTy(Builder.getContext()), 2);

1815 Val = Builder.CreateBitCast(Val, V2BF16Ty);

1816 }

1817 }

1818

1819 llvm::AtomicRMWInst *RMW =

1820 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);

1821 if (Volatile)

1822 RMW->setVolatile(true);

1823

1824 unsigned AddrSpace = Ptr.getType()->getAddressSpace();

1825 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {

1826

1827

1828 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});

1829 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);

1830

1831

1832

1833 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())

1834 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);

1835 }

1836

1837 return Builder.CreateBitCast(RMW, OrigTy);

1838 }

1839 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:

1840 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {

1843

1845 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});

1846 return Builder.CreateCall(F, {Arg});

1847 }

1848 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:

1849 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {

1850

1851

1857 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap

1858 ? Intrinsic::amdgcn_permlane16_swap

1859 : Intrinsic::amdgcn_permlane32_swap);

1860 llvm::CallInst *Call =

1861 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});

1862

1863 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);

1864 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);

1865

1867

1868 llvm::Value *Insert0 = Builder.CreateInsertElement(

1869 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));

1870 llvm::Value *AsVector =

1871 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));

1872 return AsVector;

1873 }

1874 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:

1875 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:

1877 Intrinsic::amdgcn_bitop3);

1878 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: {

1879

1880

1881

1883 for (unsigned I = 0; I < 4; ++I)

1885 llvm::PointerType *RetTy = llvm::PointerType::get(

1886 Builder.getContext(), llvm::AMDGPUAS::BUFFER_RESOURCE);

1887 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_make_buffer_rsrc,

1888 {RetTy, Args[0]->getType()});

1889 return Builder.CreateCall(F, Args);

1890 }

1891 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:

1892 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:

1893 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:

1894 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:

1895 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:

1896 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:

1898 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);

1899 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:

1900 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:

1901 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:

1902 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:

1903 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:

1904 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {

1905 llvm::Type *RetTy = nullptr;

1906 switch (BuiltinID) {

1907 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:

1909 break;

1910 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:

1912 break;

1913 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:

1915 break;

1916 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:

1917 RetTy = llvm::FixedVectorType::get(Int32Ty, 2);

1918 break;

1919 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:

1920 RetTy = llvm::FixedVectorType::get(Int32Ty, 3);

1921 break;

1922 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:

1923 RetTy = llvm::FixedVectorType::get(Int32Ty, 4);

1924 break;

1925 }

1927 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);

1928 return Builder.CreateCall(

1931 }

1932 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:

1934 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);

1935 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:

1936 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:

1938 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);

1939 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32:

1940 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64:

1942 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin);

1943 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:

1944 case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:

1946 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);

1947 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:

1949 *this, E, Intrinsic::amdgcn_s_prefetch_data);

1950 case Builtin::BIlogbf:

1951 case Builtin::BI__builtin_logbf: {

1954 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});

1955 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);

1956 Value *Exp = Builder.CreateExtractValue(FrExp, 1);

1958 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);

1963 Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));

1964 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);

1965 Value *FCmpOEQ =

1966 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));

1968 FCmpOEQ,

1969 ConstantFP::getInfinity(Builder.getFloatTy(), true), Sel1);

1970 return Sel2;

1971 }

1972 case Builtin::BIlogb:

1973 case Builtin::BI__builtin_logb: {

1976 Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});

1977 CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);

1978 Value *Exp = Builder.CreateExtractValue(FrExp, 1);

1980 Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);

1985 Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));

1986 Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);

1987 Value *FCmpOEQ =

1988 Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));

1990 FCmpOEQ,

1991 ConstantFP::getInfinity(Builder.getDoubleTy(), true),

1992 Sel1);

1993 return Sel2;

1994 }

1995 case Builtin::BIscalbnf:

1996 case Builtin::BI__builtin_scalbnf:

1997 case Builtin::BIscalbn:

1998 case Builtin::BI__builtin_scalbn:

2000 *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);

2001 default:

2002 return nullptr;

2003 }

2004}