clang: lib/CodeGen/TargetBuiltins/NVPTX.cpp Source File (original) (raw)

30#define MMA_INTR(geom_op_type, layout) \

31 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride

32#define MMA_LDST(n, geom_op_type) \

33 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }

422 switch (BuiltinID) {

423 case NVPTX::BI__nvvm_atom_add_gen_i:

424 case NVPTX::BI__nvvm_atom_add_gen_l:

425 case NVPTX::BI__nvvm_atom_add_gen_ll:

427

428 case NVPTX::BI__nvvm_atom_sub_gen_i:

429 case NVPTX::BI__nvvm_atom_sub_gen_l:

430 case NVPTX::BI__nvvm_atom_sub_gen_ll:

432

433 case NVPTX::BI__nvvm_atom_and_gen_i:

434 case NVPTX::BI__nvvm_atom_and_gen_l:

435 case NVPTX::BI__nvvm_atom_and_gen_ll:

437

438 case NVPTX::BI__nvvm_atom_or_gen_i:

439 case NVPTX::BI__nvvm_atom_or_gen_l:

440 case NVPTX::BI__nvvm_atom_or_gen_ll:

442

443 case NVPTX::BI__nvvm_atom_xor_gen_i:

444 case NVPTX::BI__nvvm_atom_xor_gen_l:

445 case NVPTX::BI__nvvm_atom_xor_gen_ll:

447

448 case NVPTX::BI__nvvm_atom_xchg_gen_i:

449 case NVPTX::BI__nvvm_atom_xchg_gen_l:

450 case NVPTX::BI__nvvm_atom_xchg_gen_ll:

452

453 case NVPTX::BI__nvvm_atom_max_gen_i:

454 case NVPTX::BI__nvvm_atom_max_gen_l:

455 case NVPTX::BI__nvvm_atom_max_gen_ll:

457

458 case NVPTX::BI__nvvm_atom_max_gen_ui:

459 case NVPTX::BI__nvvm_atom_max_gen_ul:

460 case NVPTX::BI__nvvm_atom_max_gen_ull:

462

463 case NVPTX::BI__nvvm_atom_min_gen_i:

464 case NVPTX::BI__nvvm_atom_min_gen_l:

465 case NVPTX::BI__nvvm_atom_min_gen_ll:

467

468 case NVPTX::BI__nvvm_atom_min_gen_ui:

469 case NVPTX::BI__nvvm_atom_min_gen_ul:

470 case NVPTX::BI__nvvm_atom_min_gen_ull:

472

473 case NVPTX::BI__nvvm_atom_cas_gen_us:

474 case NVPTX::BI__nvvm_atom_cas_gen_i:

475 case NVPTX::BI__nvvm_atom_cas_gen_l:

476 case NVPTX::BI__nvvm_atom_cas_gen_ll:

477

478

480

481 case NVPTX::BI__nvvm_atom_add_gen_f:

482 case NVPTX::BI__nvvm_atom_add_gen_d: {

485

486 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,

487 AtomicOrdering::SequentiallyConsistent);

488 }

489

490 case NVPTX::BI__nvvm_atom_inc_gen_ui:

492

493 case NVPTX::BI__nvvm_atom_dec_gen_ui:

495

496 case NVPTX::BI__nvvm_ldg_c:

497 case NVPTX::BI__nvvm_ldg_sc:

498 case NVPTX::BI__nvvm_ldg_c2:

499 case NVPTX::BI__nvvm_ldg_sc2:

500 case NVPTX::BI__nvvm_ldg_c4:

501 case NVPTX::BI__nvvm_ldg_sc4:

502 case NVPTX::BI__nvvm_ldg_s:

503 case NVPTX::BI__nvvm_ldg_s2:

504 case NVPTX::BI__nvvm_ldg_s4:

505 case NVPTX::BI__nvvm_ldg_i:

506 case NVPTX::BI__nvvm_ldg_i2:

507 case NVPTX::BI__nvvm_ldg_i4:

508 case NVPTX::BI__nvvm_ldg_l:

509 case NVPTX::BI__nvvm_ldg_l2:

510 case NVPTX::BI__nvvm_ldg_ll:

511 case NVPTX::BI__nvvm_ldg_ll2:

512 case NVPTX::BI__nvvm_ldg_uc:

513 case NVPTX::BI__nvvm_ldg_uc2:

514 case NVPTX::BI__nvvm_ldg_uc4:

515 case NVPTX::BI__nvvm_ldg_us:

516 case NVPTX::BI__nvvm_ldg_us2:

517 case NVPTX::BI__nvvm_ldg_us4:

518 case NVPTX::BI__nvvm_ldg_ui:

519 case NVPTX::BI__nvvm_ldg_ui2:

520 case NVPTX::BI__nvvm_ldg_ui4:

521 case NVPTX::BI__nvvm_ldg_ul:

522 case NVPTX::BI__nvvm_ldg_ul2:

523 case NVPTX::BI__nvvm_ldg_ull:

524 case NVPTX::BI__nvvm_ldg_ull2:

525 case NVPTX::BI__nvvm_ldg_f:

526 case NVPTX::BI__nvvm_ldg_f2:

527 case NVPTX::BI__nvvm_ldg_f4:

528 case NVPTX::BI__nvvm_ldg_d:

529 case NVPTX::BI__nvvm_ldg_d2:

530

531

532

533 return MakeLdg(*this, E);

534

535 case NVPTX::BI__nvvm_ldu_c:

536 case NVPTX::BI__nvvm_ldu_sc:

537 case NVPTX::BI__nvvm_ldu_c2:

538 case NVPTX::BI__nvvm_ldu_sc2:

539 case NVPTX::BI__nvvm_ldu_c4:

540 case NVPTX::BI__nvvm_ldu_sc4:

541 case NVPTX::BI__nvvm_ldu_s:

542 case NVPTX::BI__nvvm_ldu_s2:

543 case NVPTX::BI__nvvm_ldu_s4:

544 case NVPTX::BI__nvvm_ldu_i:

545 case NVPTX::BI__nvvm_ldu_i2:

546 case NVPTX::BI__nvvm_ldu_i4:

547 case NVPTX::BI__nvvm_ldu_l:

548 case NVPTX::BI__nvvm_ldu_l2:

549 case NVPTX::BI__nvvm_ldu_ll:

550 case NVPTX::BI__nvvm_ldu_ll2:

551 case NVPTX::BI__nvvm_ldu_uc:

552 case NVPTX::BI__nvvm_ldu_uc2:

553 case NVPTX::BI__nvvm_ldu_uc4:

554 case NVPTX::BI__nvvm_ldu_us:

555 case NVPTX::BI__nvvm_ldu_us2:

556 case NVPTX::BI__nvvm_ldu_us4:

557 case NVPTX::BI__nvvm_ldu_ui:

558 case NVPTX::BI__nvvm_ldu_ui2:

559 case NVPTX::BI__nvvm_ldu_ui4:

560 case NVPTX::BI__nvvm_ldu_ul:

561 case NVPTX::BI__nvvm_ldu_ul2:

562 case NVPTX::BI__nvvm_ldu_ull:

563 case NVPTX::BI__nvvm_ldu_ull2:

564 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);

565 case NVPTX::BI__nvvm_ldu_f:

566 case NVPTX::BI__nvvm_ldu_f2:

567 case NVPTX::BI__nvvm_ldu_f4:

568 case NVPTX::BI__nvvm_ldu_d:

569 case NVPTX::BI__nvvm_ldu_d2:

570 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);

571

572 case NVPTX::BI__nvvm_atom_cta_add_gen_i:

573 case NVPTX::BI__nvvm_atom_cta_add_gen_l:

574 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:

575 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);

576 case NVPTX::BI__nvvm_atom_sys_add_gen_i:

577 case NVPTX::BI__nvvm_atom_sys_add_gen_l:

578 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:

579 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);

580 case NVPTX::BI__nvvm_atom_cta_add_gen_f:

581 case NVPTX::BI__nvvm_atom_cta_add_gen_d:

582 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);

583 case NVPTX::BI__nvvm_atom_sys_add_gen_f:

584 case NVPTX::BI__nvvm_atom_sys_add_gen_d:

585 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);

586 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:

587 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:

588 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:

589 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);

590 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:

591 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:

592 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:

593 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);

594 case NVPTX::BI__nvvm_atom_cta_max_gen_i:

595 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:

596 case NVPTX::BI__nvvm_atom_cta_max_gen_l:

597 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:

598 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:

599 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:

600 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);

601 case NVPTX::BI__nvvm_atom_sys_max_gen_i:

602 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:

603 case NVPTX::BI__nvvm_atom_sys_max_gen_l:

604 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:

605 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:

606 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:

607 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);

608 case NVPTX::BI__nvvm_atom_cta_min_gen_i:

609 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:

610 case NVPTX::BI__nvvm_atom_cta_min_gen_l:

611 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:

612 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:

613 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:

614 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);

615 case NVPTX::BI__nvvm_atom_sys_min_gen_i:

616 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:

617 case NVPTX::BI__nvvm_atom_sys_min_gen_l:

618 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:

619 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:

620 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:

621 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);

622 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:

623 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);

624 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:

625 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);

626 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:

627 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);

628 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:

629 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);

630 case NVPTX::BI__nvvm_atom_cta_and_gen_i:

631 case NVPTX::BI__nvvm_atom_cta_and_gen_l:

632 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:

633 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);

634 case NVPTX::BI__nvvm_atom_sys_and_gen_i:

635 case NVPTX::BI__nvvm_atom_sys_and_gen_l:

636 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:

637 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);

638 case NVPTX::BI__nvvm_atom_cta_or_gen_i:

639 case NVPTX::BI__nvvm_atom_cta_or_gen_l:

640 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:

641 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);

642 case NVPTX::BI__nvvm_atom_sys_or_gen_i:

643 case NVPTX::BI__nvvm_atom_sys_or_gen_l:

644 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:

645 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);

646 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:

647 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:

648 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:

649 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);

650 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:

651 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:

652 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:

653 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);

654 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:

655 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:

656 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:

657 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {

659 llvm::Type *ElemTy =

661 return Builder.CreateCall(

662 CGM.getIntrinsic(

663 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),

664 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});

665 }

666 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:

667 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:

668 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:

669 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {

671 llvm::Type *ElemTy =

673 return Builder.CreateCall(

674 CGM.getIntrinsic(

675 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),

676 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});

677 }

678 case NVPTX::BI__nvvm_match_all_sync_i32p:

679 case NVPTX::BI__nvvm_match_all_sync_i64p: {

684 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p

685 ? Intrinsic::nvvm_match_all_sync_i32p

686 : Intrinsic::nvvm_match_all_sync_i64p),

687 {Mask, Val});

688 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),

690 Builder.CreateStore(Pred, PredOutPtr);

691 return Builder.CreateExtractValue(ResultPair, 0);

692 }

693

694

695 case NVPTX::BI__hmma_m16n16k16_ld_a:

696 case NVPTX::BI__hmma_m16n16k16_ld_b:

697 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:

698 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:

699 case NVPTX::BI__hmma_m32n8k16_ld_a:

700 case NVPTX::BI__hmma_m32n8k16_ld_b:

701 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:

702 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:

703 case NVPTX::BI__hmma_m8n32k16_ld_a:

704 case NVPTX::BI__hmma_m8n32k16_ld_b:

705 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:

706 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:

707

708 case NVPTX::BI__imma_m16n16k16_ld_a_s8:

709 case NVPTX::BI__imma_m16n16k16_ld_a_u8:

710 case NVPTX::BI__imma_m16n16k16_ld_b_s8:

711 case NVPTX::BI__imma_m16n16k16_ld_b_u8:

712 case NVPTX::BI__imma_m16n16k16_ld_c:

713 case NVPTX::BI__imma_m32n8k16_ld_a_s8:

714 case NVPTX::BI__imma_m32n8k16_ld_a_u8:

715 case NVPTX::BI__imma_m32n8k16_ld_b_s8:

716 case NVPTX::BI__imma_m32n8k16_ld_b_u8:

717 case NVPTX::BI__imma_m32n8k16_ld_c:

718 case NVPTX::BI__imma_m8n32k16_ld_a_s8:

719 case NVPTX::BI__imma_m8n32k16_ld_a_u8:

720 case NVPTX::BI__imma_m8n32k16_ld_b_s8:

721 case NVPTX::BI__imma_m8n32k16_ld_b_u8:

722 case NVPTX::BI__imma_m8n32k16_ld_c:

723

724 case NVPTX::BI__imma_m8n8k32_ld_a_s4:

725 case NVPTX::BI__imma_m8n8k32_ld_a_u4:

726 case NVPTX::BI__imma_m8n8k32_ld_b_s4:

727 case NVPTX::BI__imma_m8n8k32_ld_b_u4:

728 case NVPTX::BI__imma_m8n8k32_ld_c:

729 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:

730 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:

731 case NVPTX::BI__bmma_m8n8k128_ld_c:

732

733 case NVPTX::BI__dmma_m8n8k4_ld_a:

734 case NVPTX::BI__dmma_m8n8k4_ld_b:

735 case NVPTX::BI__dmma_m8n8k4_ld_c:

736

737 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:

738 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:

739 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:

740 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:

741 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:

742 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:

743 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:

744 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:

745 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {

749 std::optionalllvm::APSInt isColMajorArg =

751 if (!isColMajorArg)

752 return nullptr;

753 bool isColMajor = isColMajorArg->getSExtValue();

754 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);

755 unsigned IID = isColMajor ? II.IID_col : II.IID_row;

756 if (IID == 0)

757 return nullptr;

758

760 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});

761

762

763 assert(II.NumResults);

764 if (II.NumResults == 1) {

767 } else {

768 for (unsigned i = 0; i < II.NumResults; ++i) {

769 Builder.CreateAlignedStore(

773 llvm::ConstantInt::get(IntTy, i)),

775 }

776 }

778 }

779

780 case NVPTX::BI__hmma_m16n16k16_st_c_f16:

781 case NVPTX::BI__hmma_m16n16k16_st_c_f32:

782 case NVPTX::BI__hmma_m32n8k16_st_c_f16:

783 case NVPTX::BI__hmma_m32n8k16_st_c_f32:

784 case NVPTX::BI__hmma_m8n32k16_st_c_f16:

785 case NVPTX::BI__hmma_m8n32k16_st_c_f32:

786 case NVPTX::BI__imma_m16n16k16_st_c_i32:

787 case NVPTX::BI__imma_m32n8k16_st_c_i32:

788 case NVPTX::BI__imma_m8n32k16_st_c_i32:

789 case NVPTX::BI__imma_m8n8k32_st_c_i32:

790 case NVPTX::BI__bmma_m8n8k128_st_c_i32:

791 case NVPTX::BI__dmma_m8n8k4_st_c_f64:

792 case NVPTX::BI__mma_m16n16k8_st_c_f32: {

796 std::optionalllvm::APSInt isColMajorArg =

798 if (!isColMajorArg)

799 return nullptr;

800 bool isColMajor = isColMajorArg->getSExtValue();

801 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);

802 unsigned IID = isColMajor ? II.IID_col : II.IID_row;

803 if (IID == 0)

804 return nullptr;

806 CGM.getIntrinsic(IID, Dst->getType());

807 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);

809 for (unsigned i = 0; i < II.NumResults; ++i) {

813 llvm::ConstantInt::get(IntTy, i)),

815 Values.push_back(Builder.CreateBitCast(V, ParamType));

816 }

817 Values.push_back(Ldm);

820 }

821

822

823

824 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:

825 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:

826 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:

827 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:

828 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:

829 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:

830 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:

831 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:

832 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:

833 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:

834 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:

835 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:

836 case NVPTX::BI__imma_m16n16k16_mma_s8:

837 case NVPTX::BI__imma_m16n16k16_mma_u8:

838 case NVPTX::BI__imma_m32n8k16_mma_s8:

839 case NVPTX::BI__imma_m32n8k16_mma_u8:

840 case NVPTX::BI__imma_m8n32k16_mma_s8:

841 case NVPTX::BI__imma_m8n32k16_mma_u8:

842 case NVPTX::BI__imma_m8n8k32_mma_s4:

843 case NVPTX::BI__imma_m8n8k32_mma_u4:

844 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:

845 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:

846 case NVPTX::BI__dmma_m8n8k4_mma_f64:

847 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:

848 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:

849 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:

850 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {

855 std::optionalllvm::APSInt LayoutArg =

857 if (!LayoutArg)

858 return nullptr;

859 int Layout = LayoutArg->getSExtValue();

860 if (Layout < 0 || Layout > 3)

861 return nullptr;

862 llvm::APSInt SatfArg;

863 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||

864 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)

865 SatfArg = 0;

866 else if (std::optionalllvm::APSInt OptSatfArg =

868 SatfArg = *OptSatfArg;

869 else

870 return nullptr;

871 bool Satf = SatfArg.getSExtValue();

872 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);

873 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);

874 if (IID == 0)

875 return nullptr;

876

878 Function *Intrinsic = CGM.getIntrinsic(IID);

879 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);

880

881 for (unsigned i = 0; i < MI.NumEltsA; ++i) {

885 llvm::ConstantInt::get(IntTy, i)),

887 Values.push_back(Builder.CreateBitCast(V, AType));

888 }

889

890 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);

891 for (unsigned i = 0; i < MI.NumEltsB; ++i) {

895 llvm::ConstantInt::get(IntTy, i)),

897 Values.push_back(Builder.CreateBitCast(V, BType));

898 }

899

900 llvm::Type *CType =

901 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);

902 for (unsigned i = 0; i < MI.NumEltsC; ++i) {

906 llvm::ConstantInt::get(IntTy, i)),

908 Values.push_back(Builder.CreateBitCast(V, CType));

909 }

912 for (unsigned i = 0; i < MI.NumEltsD; ++i)

913 Builder.CreateAlignedStore(

916 llvm::ConstantInt::get(IntTy, i)),

919 }

920

921 case NVPTX::BI__nvvm_ex2_approx_f16:

922 return MakeHalfType(

923 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx, Builder.getHalfTy()),

924 BuiltinID, E, *this);

925 case NVPTX::BI__nvvm_ex2_approx_f16x2:

926 return MakeHalfType(

927 CGM.getIntrinsic(Intrinsic::nvvm_ex2_approx,

928 FixedVectorType::get(Builder.getHalfTy(), 2)),

929 BuiltinID, E, *this);

930 case NVPTX::BI__nvvm_ff2f16x2_rn:

931 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);

932 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:

933 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);

934 case NVPTX::BI__nvvm_ff2f16x2_rz:

935 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);

936 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:

937 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);

938 case NVPTX::BI__nvvm_fma_rn_f16:

939 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);

940 case NVPTX::BI__nvvm_fma_rn_f16x2:

941 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);

942 case NVPTX::BI__nvvm_fma_rn_ftz_f16:

943 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);

944 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:

945 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);

946 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:

947 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,

948 *this);

949 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:

950 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,

951 *this);

952 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:

953 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,

954 *this);

955 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:

956 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,

957 *this);

958 case NVPTX::BI__nvvm_fma_rn_relu_f16:

959 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);

960 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:

961 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);

962 case NVPTX::BI__nvvm_fma_rn_sat_f16:

963 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);

964 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:

965 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);

966 case NVPTX::BI__nvvm_fmax_f16:

967 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);

968 case NVPTX::BI__nvvm_fmax_f16x2:

969 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);

970 case NVPTX::BI__nvvm_fmax_ftz_f16:

971 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);

972 case NVPTX::BI__nvvm_fmax_ftz_f16x2:

973 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);

974 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:

975 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);

976 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:

977 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,

978 *this);

979 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:

980 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,

981 E, *this);

982 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:

983 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,

984 BuiltinID, E, *this);

985 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:

986 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,

987 *this);

988 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:

989 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,

990 E, *this);

991 case NVPTX::BI__nvvm_fmax_nan_f16:

992 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);

993 case NVPTX::BI__nvvm_fmax_nan_f16x2:

994 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);

995 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:

996 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,

997 *this);

998 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:

999 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,

1000 E, *this);

1001 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:

1002 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,

1003 *this);

1004 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:

1005 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,

1006 *this);

1007 case NVPTX::BI__nvvm_fmin_f16:

1008 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);

1009 case NVPTX::BI__nvvm_fmin_f16x2:

1010 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);

1011 case NVPTX::BI__nvvm_fmin_ftz_f16:

1012 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);

1013 case NVPTX::BI__nvvm_fmin_ftz_f16x2:

1014 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);

1015 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:

1016 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);

1017 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:

1018 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,

1019 *this);

1020 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:

1021 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,

1022 E, *this);

1023 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:

1024 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,

1025 BuiltinID, E, *this);

1026 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:

1027 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,

1028 *this);

1029 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:

1030 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,

1031 E, *this);

1032 case NVPTX::BI__nvvm_fmin_nan_f16:

1033 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);

1034 case NVPTX::BI__nvvm_fmin_nan_f16x2:

1035 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);

1036 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:

1037 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,

1038 *this);

1039 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:

1040 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,

1041 E, *this);

1042 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:

1043 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,

1044 *this);

1045 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:

1046 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,

1047 *this);

1048 case NVPTX::BI__nvvm_fabs_f:

1049 case NVPTX::BI__nvvm_abs_bf16:

1050 case NVPTX::BI__nvvm_abs_bf16x2:

1051 case NVPTX::BI__nvvm_fabs_f16:

1052 case NVPTX::BI__nvvm_fabs_f16x2:

1053 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs,

1055 case NVPTX::BI__nvvm_fabs_ftz_f:

1056 case NVPTX::BI__nvvm_fabs_ftz_f16:

1057 case NVPTX::BI__nvvm_fabs_ftz_f16x2:

1058 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz,

1060 case NVPTX::BI__nvvm_fabs_d:

1061 return Builder.CreateUnaryIntrinsic(Intrinsic::fabs,

1063 case NVPTX::BI__nvvm_ex2_approx_d:

1064 case NVPTX::BI__nvvm_ex2_approx_f:

1065 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx,

1067 case NVPTX::BI__nvvm_ex2_approx_ftz_f:

1068 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_ex2_approx_ftz,

1070 case NVPTX::BI__nvvm_ldg_h:

1071 case NVPTX::BI__nvvm_ldg_h2:

1072 return EnsureNativeHalfSupport(BuiltinID, E, *this) ? MakeLdg(*this, E)

1073 : nullptr;

1074 case NVPTX::BI__nvvm_ldu_h:

1075 case NVPTX::BI__nvvm_ldu_h2:

1076 return EnsureNativeHalfSupport(BuiltinID, E, *this)

1077 ? MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E)

1078 : nullptr;

1079 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:

1080 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,

1081 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,

1082 4);

1083 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:

1084 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,

1085 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,

1086 8);

1087 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:

1088 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,

1089 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,

1090 16);

1091 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:

1092 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,

1093 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,

1094 16);

1095 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:

1096 return Builder.CreateCall(

1097 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));

1098 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:

1099 return Builder.CreateCall(

1100 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));

1101 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:

1102 return Builder.CreateCall(

1103 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));

1104 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:

1105 return Builder.CreateCall(

1106 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));

1107 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:

1108 return Builder.CreateCall(

1109 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));

1110 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:

1111 return Builder.CreateCall(

1112 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));

1113 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:

1114 return Builder.CreateCall(

1115 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));

1116 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:

1117 return Builder.CreateCall(

1118 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));

1119 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:

1120 return Builder.CreateCall(

1121 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));

1122 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:

1123 return Builder.CreateCall(

1124 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));

1125 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:

1126 return Builder.CreateCall(

1127 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));

1128 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:

1129 return Builder.CreateCall(

1130 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));

1131 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:

1132 return Builder.CreateCall(

1133 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));

1134 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:

1135 return Builder.CreateCall(

1136 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));

1137 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:

1138 return Builder.CreateCall(

1139 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));

1140 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:

1141 return Builder.CreateCall(

1142 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));

1143 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:

1144 return Builder.CreateCall(

1145 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));

1146 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:

1147 return Builder.CreateCall(

1148 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));

1149 case NVPTX::BI__nvvm_is_explicit_cluster:

1150 return Builder.CreateCall(

1151 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));

1152 case NVPTX::BI__nvvm_isspacep_shared_cluster:

1153 return Builder.CreateCall(

1154 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),

1156 case NVPTX::BI__nvvm_mapa:

1157 return Builder.CreateCall(

1158 CGM.getIntrinsic(Intrinsic::nvvm_mapa),

1159 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});

1160 case NVPTX::BI__nvvm_mapa_shared_cluster:

1161 return Builder.CreateCall(

1162 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),

1163 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});

1164 case NVPTX::BI__nvvm_getctarank:

1165 return Builder.CreateCall(

1166 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),

1168 case NVPTX::BI__nvvm_getctarank_shared_cluster:

1169 return Builder.CreateCall(

1170 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),

1172 case NVPTX::BI__nvvm_barrier_cluster_arrive:

1173 return Builder.CreateCall(

1174 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));

1175 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:

1176 return Builder.CreateCall(

1177 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));

1178 case NVPTX::BI__nvvm_barrier_cluster_wait:

1179 return Builder.CreateCall(

1180 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));

1181 case NVPTX::BI__nvvm_fence_sc_cluster:

1182 return Builder.CreateCall(

1183 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));

1184 case NVPTX::BI__nvvm_bar_sync:

1185 return Builder.CreateCall(

1186 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),

1188 case NVPTX::BI__syncthreads:

1189 return Builder.CreateCall(

1190 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all),

1192 case NVPTX::BI__nvvm_barrier_sync:

1193 return Builder.CreateCall(

1194 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all),

1196 case NVPTX::BI__nvvm_barrier_sync_cnt:

1197 return Builder.CreateCall(

1198 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count),

1199 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});

1200 default:

1201 return nullptr;

1202 }

1203}