LLVM: lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

23#include "llvm/IR/IntrinsicsNVPTX.h"

30#include

31

32using namespace llvm;

33

34#define DEBUG_TYPE "nvptx-isel"

35#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"

36

39 cl::desc("Enable reciprocal sqrt optimization"));

40

41

42

43

46 cl::desc("Enable MAD wide optimization"));

47

48

49

54

59

61

63

67

73

75NVPTXDAGToDAGISel::getDivF32Level(const SDNode *N) const {

77}

78

79bool NVPTXDAGToDAGISel::usePrecSqrtF32(const SDNode *N) const {

81}

82

83bool NVPTXDAGToDAGISel::useF32FTZ() const {

84 return Subtarget->getTargetLowering()->useF32FTZ(*MF);

85}

86

87bool NVPTXDAGToDAGISel::allowFMA() const {

88 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();

90}

91

92bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }

93

94bool NVPTXDAGToDAGISel::doMADWideOpt() const { return EnableMADWide; }

95

96

97

98void NVPTXDAGToDAGISel::Select(SDNode *N) {

99

100 if (N->isMachineOpcode()) {

101 N->setNodeId(-1);

102 return;

103 }

104

105 switch (N->getOpcode()) {

106 case ISD::LOAD:

107 case ISD::ATOMIC_LOAD:

109 if (tryLoad(N))

110 return;

111 break;

112 case ISD::STORE:

113 case ISD::ATOMIC_STORE:

114 if (tryStore(N))

115 return;

116 break;

117 case ISD::ATOMIC_FENCE:

118 if (tryFence(N))

119 return;

120 break;

122 tryUNPACK_VECTOR(N);

123 return;

125 if (tryEXTRACT_VECTOR_ELEMENT(N))

126 return;

127 break;

129 SelectSETP_F16X2(N);

130 return;

132 SelectSETP_BF16X2(N);

133 return;

137 if (tryLoadVector(N))

138 return;

139 break;

142 if (tryLDU(N))

143 return;

144 break;

148 if (tryStoreVector(N))

149 return;

150 break;

152 if (tryIntrinsicChain(N))

153 return;

154 break;

156 if (tryIntrinsicVoid(N))

157 return;

158 break;

162

163 if (tryBFE(N))

164 return;

165 break;

166 case ISD::ADDRSPACECAST:

167 SelectAddrSpaceCast(N);

168 return;

170 if (N->getOperand(1).getValueType() == MVT::i128) {

171 SelectV2I64toI128(N);

172 return;

173 }

174 break;

175 }

177 if (N->getOperand(1).getValueType() == MVT::i128) {

178 SelectI128toV2I64(N);

179 return;

180 }

181 break;

182 }

185 selectAtomicSwap128(N);

186 return;

190 if (tryBF16ArithToFMA(N))

191 return;

192 break;

193 default:

194 break;

195 }

196 SelectCode(N);

197}

198

199#define TCGEN05_LD_OPCODE(SHAPE, NUM) \

200 (enablePack ? NVPTX::TCGEN05_LD_##SHAPE##_##NUM##_PACK \

201 : NVPTX::TCGEN05_LD_##SHAPE##_##NUM)

202

204 switch (IID) {

205 case Intrinsic::nvvm_tcgen05_ld_16x64b_x1:

207 case Intrinsic::nvvm_tcgen05_ld_16x64b_x2:

209 case Intrinsic::nvvm_tcgen05_ld_16x64b_x4:

211 case Intrinsic::nvvm_tcgen05_ld_16x64b_x8:

213 case Intrinsic::nvvm_tcgen05_ld_16x64b_x16:

215 case Intrinsic::nvvm_tcgen05_ld_16x64b_x32:

217 case Intrinsic::nvvm_tcgen05_ld_16x64b_x64:

219 case Intrinsic::nvvm_tcgen05_ld_16x64b_x128:

221 case Intrinsic::nvvm_tcgen05_ld_16x128b_x1:

223 case Intrinsic::nvvm_tcgen05_ld_16x128b_x2:

225 case Intrinsic::nvvm_tcgen05_ld_16x128b_x4:

227 case Intrinsic::nvvm_tcgen05_ld_16x128b_x8:

229 case Intrinsic::nvvm_tcgen05_ld_16x128b_x16:

231 case Intrinsic::nvvm_tcgen05_ld_16x128b_x32:

233 case Intrinsic::nvvm_tcgen05_ld_16x128b_x64:

235 case Intrinsic::nvvm_tcgen05_ld_16x256b_x1:

237 case Intrinsic::nvvm_tcgen05_ld_16x256b_x2:

239 case Intrinsic::nvvm_tcgen05_ld_16x256b_x4:

241 case Intrinsic::nvvm_tcgen05_ld_16x256b_x8:

243 case Intrinsic::nvvm_tcgen05_ld_16x256b_x16:

245 case Intrinsic::nvvm_tcgen05_ld_16x256b_x32:

247 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x1:

249 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x2:

251 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x4:

253 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x8:

255 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x16:

257 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x32:

259 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x64:

261 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x128:

263 case Intrinsic::nvvm_tcgen05_ld_32x32b_x1:

265 case Intrinsic::nvvm_tcgen05_ld_32x32b_x2:

267 case Intrinsic::nvvm_tcgen05_ld_32x32b_x4:

269 case Intrinsic::nvvm_tcgen05_ld_32x32b_x8:

271 case Intrinsic::nvvm_tcgen05_ld_32x32b_x16:

273 case Intrinsic::nvvm_tcgen05_ld_32x32b_x32:

275 case Intrinsic::nvvm_tcgen05_ld_32x32b_x64:

277 case Intrinsic::nvvm_tcgen05_ld_32x32b_x128:

279 }

281}

282

283void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) {

284 if (Subtarget->hasTcgen05InstSupport())

286 "tcgen05.ld is not supported on this architecture variant");

287

288 SDLoc DL(N);

290

291 if (hasOffset) {

293 auto OffsetNode = CurDAG->getTargetConstant(

297 {N->getOperand(2), OffsetNode, N->getOperand(0)}));

298 } else {

302 {N->getOperand(2), N->getOperand(0)}));

303 }

304}

305

306bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {

307 unsigned IID = N->getConstantOperandVal(1);

308 switch (IID) {

309 default:

310 return false;

311 case Intrinsic::nvvm_ldu_global_f:

312 case Intrinsic::nvvm_ldu_global_i:

313 case Intrinsic::nvvm_ldu_global_p:

314 return tryLDU(N);

315

316 case Intrinsic::nvvm_tcgen05_ld_16x64b_x1:

317 case Intrinsic::nvvm_tcgen05_ld_16x64b_x2:

318 case Intrinsic::nvvm_tcgen05_ld_16x64b_x4:

319 case Intrinsic::nvvm_tcgen05_ld_16x64b_x8:

320 case Intrinsic::nvvm_tcgen05_ld_16x64b_x16:

321 case Intrinsic::nvvm_tcgen05_ld_16x64b_x32:

322 case Intrinsic::nvvm_tcgen05_ld_16x64b_x64:

323 case Intrinsic::nvvm_tcgen05_ld_16x64b_x128:

324 case Intrinsic::nvvm_tcgen05_ld_16x128b_x1:

325 case Intrinsic::nvvm_tcgen05_ld_16x128b_x2:

326 case Intrinsic::nvvm_tcgen05_ld_16x128b_x4:

327 case Intrinsic::nvvm_tcgen05_ld_16x128b_x16:

328 case Intrinsic::nvvm_tcgen05_ld_16x128b_x32:

329 case Intrinsic::nvvm_tcgen05_ld_16x128b_x64:

330 case Intrinsic::nvvm_tcgen05_ld_16x256b_x1:

331 case Intrinsic::nvvm_tcgen05_ld_16x128b_x8:

332 case Intrinsic::nvvm_tcgen05_ld_16x256b_x2:

333 case Intrinsic::nvvm_tcgen05_ld_16x256b_x4:

334 case Intrinsic::nvvm_tcgen05_ld_16x256b_x8:

335 case Intrinsic::nvvm_tcgen05_ld_16x256b_x16:

336 case Intrinsic::nvvm_tcgen05_ld_16x256b_x32:

337 case Intrinsic::nvvm_tcgen05_ld_32x32b_x1:

338 case Intrinsic::nvvm_tcgen05_ld_32x32b_x2:

339 case Intrinsic::nvvm_tcgen05_ld_32x32b_x4:

340 case Intrinsic::nvvm_tcgen05_ld_32x32b_x8:

341 case Intrinsic::nvvm_tcgen05_ld_32x32b_x16:

342 case Intrinsic::nvvm_tcgen05_ld_32x32b_x32:

343 case Intrinsic::nvvm_tcgen05_ld_32x32b_x64:

344 case Intrinsic::nvvm_tcgen05_ld_32x32b_x128: {

345 SelectTcgen05Ld(N);

346 return true;

347 }

348

349 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x1:

350 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x2:

351 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x4:

352 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x8:

353 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x16:

354 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x32:

355 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x64:

356 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x128: {

357 SelectTcgen05Ld(N, true);

358 return true;

359 }

360 }

361}

362

363

364

367 const unsigned PTXCmpMode = [](ISD::CondCode CC) {

368 switch (CC) {

369 default:

373 return CmpMode::EQ;

376 return CmpMode::GT;

379 return CmpMode::GE;

382 return CmpMode::LT;

385 return CmpMode::LE;

388 return CmpMode::NE;

390 return CmpMode::NUM;

392 return CmpMode::NotANumber;

394 return CmpMode::EQU;

396 return CmpMode::GTU;

398 return CmpMode::GEU;

400 return CmpMode::LTU;

402 return CmpMode::LEU;

404 return CmpMode::NEU;

405 }

407 return CurDAG->getTargetConstant(PTXCmpMode, SDLoc(), MVT::i32);

408}

409

410bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {

412 SDLoc DL(N);

413 SDNode *SetP = CurDAG->getMachineNode(

414 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1,

415 {N->getOperand(0), N->getOperand(1), PTXCmpMode,

416 CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});

418 return true;

419}

420

421bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {

423 SDLoc DL(N);

424 SDNode *SetP = CurDAG->getMachineNode(

425 NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1,

426 {N->getOperand(0), N->getOperand(1), PTXCmpMode,

427 CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});

429 return true;

430}

431

432bool NVPTXDAGToDAGISel::tryUNPACK_VECTOR(SDNode *N) {

434 MVT EltVT = N->getSimpleValueType(0);

435

436 MachineSDNode *N2 =

437 CurDAG->getMachineNode(NVPTX::I64toV2I32, SDLoc(N), EltVT, EltVT, Vector);

438

440 return true;

441}

442

443

444

445bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {

447

448 MVT VT = Vector.getSimpleValueType();

450 return false;

451

452 unsigned Opcode;

454 Opcode = NVPTX::I32toV2I16;

456 Opcode = NVPTX::I64toV2I32;

457 else

459

460

462 for (auto *U : Vector.getNode()->users()) {

464 continue;

465 if (U->getOperand(0) != Vector)

466 continue;

467 if (const ConstantSDNode *IdxConst =

469 if (IdxConst->getZExtValue() == 0)

471 else if (IdxConst->getZExtValue() == 1)

473 else

475 }

476 }

477

478

479

481 return false;

482

483

484

486 SDNode *ScatterOp =

487 CurDAG->getMachineNode(Opcode, SDLoc(N), EltVT, EltVT, Vector);

488 for (auto *Node : E0)

490 for (auto *Node : E1)

492

493 return true;

494}

495

496static std::optionalNVPTX::AddressSpace convertAS(unsigned AS) {

497 switch (AS) {

512 default:

513 return std::nullopt;

514 }

515}

516

518 return convertAS(N->getMemOperand()->getAddrSpace())

520}

521

523

526 auto Ordering = N->getMergedOrdering();

527 switch (Ordering) {

541 }

543}

544

546

547

550 return Scopes[N->getSyncScopeID()];

551}

552

553namespace {

554

555struct OperationOrderings {

556 NVPTX::Ordering InstructionOrdering, FenceOrdering;

557 OperationOrderings(NVPTX::Ordering IO = NVPTX::Ordering::NotAtomic,

558 NVPTX::Ordering FO = NVPTX::Ordering::NotAtomic)

559 : InstructionOrdering(IO), FenceOrdering(FO) {}

560};

561

562static OperationOrderings

566

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

657 }

658

659

660

664 !HasMemoryOrdering) {

666 formatv("PTX does not support \"atomic\" for orderings different than"

667 "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order "

668 "is: \"{}\".",

670 }

671

672

673

674

675

676

677

678

679 bool AddrGenericOrGlobalOrShared =

684 if (!AddrGenericOrGlobalOrShared)

686

687 bool UseRelaxedMMIO =

689

690 switch (Ordering) {

695

696

698 if (N->isVolatile())

701 else

704

705

707 if (N->readMem())

709 formatv("PTX only supports Acquire Ordering on reads: {}",

710 N->getOperationName()));

713 if (N->writeMem())

715 formatv("PTX only supports Release Ordering on writes: {}",

716 N->getOperationName()));

720 formatv("NVPTX does not support AcquireRelease Ordering on "

721 "read-modify-write "

722 "yet and PTX does not support it on loads or stores: {}",

723 N->getOperationName()));

724 }

726

727

728

729

730

731

732

734 if (N->readMem())

736 else if (N->writeMem())

738 else

740 formatv("NVPTX does not support SequentiallyConsistent Ordering on "

741 "read-modify-writes yet: {}",

742 N->getOperationName()));

743 return OperationOrderings(InstrOrder,

745 }

746 }

748 formatv("NVPTX backend does not support AtomicOrdering \"{}\" yet.",

750}

751

752}

753

756 switch (O) {

759

762

763

764

771 auto S = Scopes[N->getSyncScopeID()];

772

773

777

778

780 Subtarget->failIfClustersUnsupported("cluster scope");

781

782

784 }

786}

787

790

791

793 N.isInvariant();

794}

795

799 T->failIfClustersUnsupported(".cluster scope fence");

800

801

802 if (T->hasSplitAcquireAndReleaseFences() &&

805

806 switch (O) {

808 switch (S) {

810 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_sys

811 : NVPTX::INT_MEMBAR_SYS;

813 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_cta

814 : NVPTX::INT_MEMBAR_CTA;

816 return NVPTX::atomic_thread_fence_acquire_cluster;

818 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_gpu

819 : NVPTX::INT_MEMBAR_GL;

823 formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",

824 ScopeToString(S)));

825 }

826 break;

828 switch (S) {

830 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_sys

831 : NVPTX::INT_MEMBAR_SYS;

833 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_cta

834 : NVPTX::INT_MEMBAR_CTA;

836 return NVPTX::atomic_thread_fence_release_cluster;

838 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_gpu

839 : NVPTX::INT_MEMBAR_GL;

843 formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",

844 ScopeToString(S)));

845 }

846 break;

848 switch (S) {

850 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_sys

851 : NVPTX::INT_MEMBAR_SYS;

853 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_cta

854 : NVPTX::INT_MEMBAR_CTA;

856 return NVPTX::atomic_thread_fence_acq_rel_cluster;

858 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu

859 : NVPTX::INT_MEMBAR_GL;

863 formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",

864 ScopeToString(S)));

865 }

866 break;

867 }

869 switch (S) {

871 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_sys

872 : NVPTX::INT_MEMBAR_SYS;

874 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_cta

875 : NVPTX::INT_MEMBAR_CTA;

877 return NVPTX::atomic_thread_fence_seq_cst_cluster;

879 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu

880 : NVPTX::INT_MEMBAR_GL;

884 ScopeToString(S)));

885 }

886 break;

887 }

893 formatv("Unsupported \"{}\" ordering and \"{}\" scope for fence.",

894 OrderingToString(O), ScopeToString(S)));

895 }

897}

898

899

900

901

902std::pair<NVPTX::Ordering, NVPTX::Scope>

903NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,

905 auto [InstructionOrdering, FenceOrdering] =

907 auto Scope = getOperationScope(N, InstructionOrdering);

908

909

912 break;

915 Chain = SDValue(CurDAG->getMachineNode(Op, DL, MVT::Other, Chain), 0);

916 break;

917 }

918 default:

920 formatv("Unexpected fence ordering: \"{}\".",

922 }

923 return {InstructionOrdering, Scope};

924}

925

926void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {

927 SDValue Src = N->getOperand(0);

931 SDLoc DL(N);

932 assert(SrcAddrSpace != DstAddrSpace &&

933 "addrspacecast must be between different address spaces");

934

936

937

938 if (TM.is64Bit() && TM.getPointerSizeInBits(SrcAddrSpace) == 32) {

941 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u64_u32, DL, MVT::i64,

942 Src, CvtNone);

944 }

945

946 unsigned Opc;

947 switch (SrcAddrSpace) {

950 Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;

951 break;

953 Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;

954 break;

956 if (!TM.is64Bit())

958 "Shared cluster address space is only supported in 64-bit mode");

959 Opc = NVPTX::cvta_shared_cluster_64;

960 break;

962 Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;

963 break;

965 Opc = TM.is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local;

966 break;

968 Opc = TM.is64Bit() ? NVPTX::cvta_param_64 : NVPTX::cvta_param;

969 break;

970 }

972 return;

973 } else {

974

975 if (SrcAddrSpace != 0)

976 report_fatal_error("Cannot cast between two non-generic address spaces");

977 unsigned Opc;

978 switch (DstAddrSpace) {

981 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;

982 break;

984 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;

985 break;

987 if (!TM.is64Bit())

989 "Shared cluster address space is only supported in 64-bit mode");

990 Opc = NVPTX::cvta_to_shared_cluster_64;

991 break;

993 Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;

994 break;

996 Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;

997 break;

999 Opc = TM.is64Bit() ? NVPTX::cvta_to_param_64 : NVPTX::cvta_to_param;

1000 break;

1001 }

1002

1003 SDNode *CVTA = CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src);

1004 if (TM.is64Bit() && TM.getPointerSizeInBits(DstAddrSpace) == 32) {

1007 CVTA = CurDAG->getMachineNode(NVPTX::CVT_u32_u64, DL, MVT::i32,

1008 SDValue(CVTA, 0), CvtNone);

1009 }

1010

1012 return;

1013 }

1014}

1015

1016

1017

1018static std::optional

1020 std::optional Opcode_i32,

1021 std::optional Opcode_i64) {

1022 switch (VT) {

1023 case MVT::f16:

1024 case MVT::i16:

1025 case MVT::bf16:

1026 return Opcode_i16;

1027 case MVT::v2f16:

1028 case MVT::v2bf16:

1029 case MVT::v2i16:

1030 case MVT::v4i8:

1031 case MVT::i32:

1032 case MVT::f32:

1033 return Opcode_i32;

1034 case MVT::v2f32:

1035 case MVT::v2i32:

1036 case MVT::i64:

1037 case MVT::f64:

1038 return Opcode_i64;

1039 default:

1040 return std::nullopt;

1041 }

1042}

1043

1045 return V.getOpcode() == ISD::ADD ||

1046 (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());

1047}

1048

1051 N = N.getOperand(0);

1052 return N;

1053}

1054

1055

1056

1061 GA->getValueType(0), GA->getOffset(),

1062 GA->getTargetFlags());

1065 ES->getTargetFlags());

1068

1069 return N;

1070}

1071

1074 APInt AccumulatedOffset(64u, 0);

1077 if (!CN)

1078 break;

1079

1080 const APInt CI = CN->getAPIntValue().sext(64);

1081 if (!(CI + AccumulatedOffset).isSignedIntN(32))

1082 break;

1083

1084 AccumulatedOffset += CI;

1086 }

1088 MVT::i32);

1089}

1090

1096

1097

1098

1099

1100

1101

1102

1103

1107 return true;

1108}

1109

1110bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {

1112 assert(LD->readMem() && "Expected load");

1113

1114

1116 if (PlainLoad && PlainLoad->isIndexed())

1117 return false;

1118

1119

1120 const auto CodeAddrSpace = getAddrSpace(LD);

1122 return tryLDG(LD);

1123

1124 SDLoc DL(LD);

1126 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, LD);

1127

1128 const unsigned FromTypeWidth = LD->getMemoryVT().getSizeInBits();

1129

1130

1135

1136 uint32_t UsedBytesMask;

1137 switch (N->getOpcode()) {

1138 case ISD::LOAD:

1139 case ISD::ATOMIC_LOAD:

1140 UsedBytesMask = UINT32_MAX;

1141 break;

1143 UsedBytesMask = N->getConstantOperandVal(3);

1144 break;

1145 default:

1147 }

1148

1150 FromTypeWidth <= 128 && "Invalid width for load");

1151

1152

1155 getI32Imm(Scope, DL),

1156 getI32Imm(CodeAddrSpace, DL),

1157 getI32Imm(FromType, DL),

1158 getI32Imm(FromTypeWidth, DL),

1159 getI32Imm(UsedBytesMask, DL),

1162 Chain};

1163

1165 const std::optional Opcode =

1166 pickOpcodeForVT(TargetVT, NVPTX::LD_i16, NVPTX::LD_i32, NVPTX::LD_i64);

1167 if (!Opcode)

1168 return false;

1169

1170 SDNode *NVPTXLD = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);

1171 if (!NVPTXLD)

1172 return false;

1173

1174 MachineMemOperand *MemRef = LD->getMemOperand();

1176

1178 return true;

1179}

1180

1182 switch (N->getOpcode()) {

1184 return 2;

1186 return 4;

1188 return 8;

1189 default:

1191 }

1192}

1193

1194bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {

1196

1197

1198 const auto CodeAddrSpace = getAddrSpace(LD);

1200 return tryLDG(LD);

1201

1202 const MVT EltVT = LD->getSimpleValueType(0);

1203 SDLoc DL(LD);

1205 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, LD);

1206

1207

1208

1209

1210

1211

1212

1213

1214

1215 const unsigned ExtensionType = N->getConstantOperandVal(4);

1218 : NVPTX::PTXLdStInstCode::Untyped;

1219

1221 const uint32_t UsedBytesMask = N->getConstantOperandVal(3);

1222

1224

1227 getI32Imm(Scope, DL),

1228 getI32Imm(CodeAddrSpace, DL),

1229 getI32Imm(FromType, DL),

1230 getI32Imm(FromTypeWidth, DL),

1231 getI32Imm(UsedBytesMask, DL),

1234 Chain};

1235

1236 std::optional Opcode;

1237 switch (N->getOpcode()) {

1238 default:

1242 NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2);

1243 break;

1246 NVPTX::LDV_i32_v4, NVPTX::LDV_i64_v4);

1247 break;

1250 NVPTX::LDV_i32_v8, {});

1251 break;

1252 }

1253 if (!Opcode)

1254 return false;

1255

1256 SDNode *NVPTXLD = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);

1257

1258 MachineMemOperand *MemRef = LD->getMemOperand();

1260

1262 return true;

1263}

1264

1265bool NVPTXDAGToDAGISel::tryLDG(MemSDNode *LD) {

1266 SDLoc DL(LD);

1267

1268 unsigned ExtensionType;

1269 uint32_t UsedBytesMask;

1271 ExtensionType = Load->getExtensionType();

1272 UsedBytesMask = UINT32_MAX;

1273 } else {

1274 ExtensionType = LD->getConstantOperandVal(4);

1275 UsedBytesMask = LD->getConstantOperandVal(3);

1276 }

1279 : NVPTX::PTXLdStInstCode::Untyped;

1280

1282

1283 assert(!(LD->getSimpleValueType(0).isVector() &&

1285

1288 getI32Imm(FromTypeWidth, DL),

1289 getI32Imm(UsedBytesMask, DL),

1292 LD->getChain()};

1293

1295 std::optional Opcode;

1296 switch (LD->getOpcode()) {

1297 default:

1299 case ISD::LOAD:

1300 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_GLOBAL_NC_i16,

1301 NVPTX::LD_GLOBAL_NC_i32, NVPTX::LD_GLOBAL_NC_i64);

1302 break;

1304 Opcode = pickOpcodeForVT(TargetVT, std::nullopt, NVPTX::LD_GLOBAL_NC_i32,

1305 NVPTX::LD_GLOBAL_NC_i64);

1306 break;

1308 Opcode =

1310 NVPTX::LD_GLOBAL_NC_v2i32, NVPTX::LD_GLOBAL_NC_v2i64);

1311 break;

1313 Opcode =

1315 NVPTX::LD_GLOBAL_NC_v4i32, NVPTX::LD_GLOBAL_NC_v4i64);

1316 break;

1319 NVPTX::LD_GLOBAL_NC_v8i32, {});

1320 break;

1321 }

1322 if (!Opcode)

1323 return false;

1324

1325 SDNode *NVPTXLDG = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);

1326

1328 return true;

1329}

1330

1334 auto ElementBitWidth = TotalWidth / NumElts;

1336 ElementBitWidth <= 128 && TotalWidth <= 256 &&

1337 "Invalid width for load");

1338 return ElementBitWidth;

1339}

1340

1341bool NVPTXDAGToDAGISel::tryLDU(SDNode *N) {

1343

1347

1348

1349

1352

1355

1356 std::optional Opcode;

1357 switch (N->getOpcode()) {

1358 default:

1361 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LDU_GLOBAL_i16,

1362 NVPTX::LDU_GLOBAL_i32, NVPTX::LDU_GLOBAL_i64);

1363 break;

1365 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LDU_GLOBAL_v2i16,

1366 NVPTX::LDU_GLOBAL_v2i32, NVPTX::LDU_GLOBAL_v2i64);

1367 break;

1369 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LDU_GLOBAL_v4i16,

1370 NVPTX::LDU_GLOBAL_v4i32, {});

1371 break;

1372 }

1373 if (!Opcode)

1374 return false;

1375

1376 SDNode *NVPTXLDU = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);

1377

1379 return true;

1380}

1381

1382bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {

1384 assert(ST->writeMem() && "Expected store");

1387 assert((PlainStore || AtomicStore) && "Expected store");

1388

1389

1390 if (PlainStore && PlainStore->isIndexed())

1391 return false;

1392

1393

1394 const auto CodeAddrSpace = getAddrSpace(ST);

1395

1396 SDLoc DL(ST);

1398 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, ST);

1399

1400

1401 const unsigned ToTypeWidth = ST->getMemoryVT().getSizeInBits();

1402

1403

1405

1406 assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&

1407 "Invalid width for store");

1408

1411 getI32Imm(Ordering, DL),

1412 getI32Imm(Scope, DL),

1413 getI32Imm(CodeAddrSpace, DL),

1414 getI32Imm(ToTypeWidth, DL),

1417 Chain};

1418

1419 const std::optional Opcode =

1421 NVPTX::ST_i32, NVPTX::ST_i64);

1422 if (!Opcode)

1423 return false;

1424

1425 SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);

1426

1427 if (!NVPTXST)

1428 return false;

1429

1430 MachineMemOperand *MemRef = ST->getMemOperand();

1433 return true;

1434}

1435

1436bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {

1438 const unsigned TotalWidth = ST->getMemoryVT().getSizeInBits();

1439

1440

1441 const auto CodeAddrSpace = getAddrSpace(ST);

1443 report_fatal_error("Cannot store to pointer that points to constant "

1444 "memory space");

1445 }

1446

1447 SDLoc DL(ST);

1449 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, ST);

1450

1452

1454 for (auto &V : ST->ops().slice(1, NumElts))

1455 Ops.push_back(selectPossiblyImm(V));

1457 const unsigned ToTypeWidth = TotalWidth / NumElts;

1458

1459 assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&

1460 TotalWidth <= 256 && "Invalid width for store");

1461

1463 Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),

1464 getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,

1466

1468 ST->getOperand(1).getSimpleValueType().SimpleTy;

1469 std::optional Opcode;

1470 switch (ST->getOpcode()) {

1471 default:

1472 return false;

1474 Opcode = pickOpcodeForVT(EltVT, NVPTX::STV_i16_v2, NVPTX::STV_i32_v2,

1475 NVPTX::STV_i64_v2);

1476 break;

1478 Opcode = pickOpcodeForVT(EltVT, NVPTX::STV_i16_v4, NVPTX::STV_i32_v4,

1479 NVPTX::STV_i64_v4);

1480 break;

1482 Opcode = pickOpcodeForVT(EltVT, {}, NVPTX::STV_i32_v8,

1483 {});

1484 break;

1485 }

1486

1487 if (!Opcode)

1488 return false;

1489

1490 SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);

1491

1492 MachineMemOperand *MemRef = ST->getMemOperand();

1494

1496 return true;

1497}

1498

1499

1500

1501bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {

1502 SDLoc DL(N);

1508 bool IsSigned = false;

1509

1510 if (N->getOpcode() == ISD::AND) {

1511

1512

1515 }

1516

1518 if (!Mask) {

1519

1520 return false;

1521 }

1522

1523

1524 uint64_t MaskVal = Mask->getZExtValue();

1526

1527

1528

1529 return false;

1530 }

1531

1532

1533 int64_t NumBits = countr_one(MaskVal);

1534 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);

1535

1537

1538 Val = LHS.getNode()->getOperand(0);

1539 Start = LHS.getNode()->getOperand(1);

1541 if (StartConst) {

1542 uint64_t StartVal = StartConst->getZExtValue();

1543

1544

1545 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;

1546 if (NumBits > GoodBits) {

1547

1548

1549

1550 return false;

1551 }

1552 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);

1553 } else {

1554

1555

1556

1557

1558 return false;

1559 }

1560 } else {

1561

1562

1563

1564 return false;

1565 }

1566 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {

1569 if (!ShiftCnst) {

1570

1571 return false;

1572 }

1573

1574 uint64_t ShiftAmt = ShiftCnst->getZExtValue();

1575

1576 SDValue AndLHS = LHS->getOperand(0);

1577 SDValue AndRHS = LHS->getOperand(1);

1578

1579

1582 }

1583

1585 if (!MaskCnst) {

1586

1587 return false;

1588 }

1589

1590 uint64_t MaskVal = MaskCnst->getZExtValue();

1591 uint64_t NumZeros;

1592 uint64_t NumBits;

1594 NumZeros = 0;

1595

1596

1601

1602

1603

1604 NumBits = NumZeros + NumOnes - ShiftAmt;

1605 } else {

1606

1607 return false;

1608 }

1609

1610 if (ShiftAmt < NumZeros) {

1611

1612

1613 return false;

1614 }

1615

1616 Val = AndLHS;

1617 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);

1618 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);

1619

1620

1621

1622

1623

1626 } else if (LHS->getOpcode() == ISD::SHL) {

1627

1628

1629

1630

1631

1632

1633

1634 Val = LHS->getOperand(0);

1635

1636 SDValue ShlRHS = LHS->getOperand(1);

1638 if (!ShlCnst) {

1639

1640 return false;

1641 }

1642 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();

1643

1646 if (!ShrCnst) {

1647

1648 return false;

1649 }

1650 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();

1651

1652

1653 if (OuterShiftAmt < InnerShiftAmt) {

1654 return false;

1655 }

1656

1657

1658

1659

1661 return false;

1662 }

1663

1664 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,

1665 MVT::i32);

1667 DL, MVT::i32);

1668

1669 if (N->getOpcode() == ISD::SRA) {

1670

1671

1672 IsSigned = true;

1673 }

1674 } else {

1675

1676 return false;

1677 }

1678 } else {

1679

1680 return false;

1681 }

1682

1683

1684 unsigned Opc;

1685

1686

1688 if (IsSigned) {

1689 Opc = NVPTX::BFE_S32rii;

1690 } else {

1691 Opc = NVPTX::BFE_U32rii;

1692 }

1693 } else if (Val.getValueType() == MVT::i64) {

1694 if (IsSigned) {

1695 Opc = NVPTX::BFE_S64rii;

1696 } else {

1697 Opc = NVPTX::BFE_U64rii;

1698 }

1699 } else {

1700

1701 return false;

1702 }

1703

1706 };

1707

1709 return true;

1710}

1711

1712

1713bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {

1714 EVT VT = SDValue(N, 0).getValueType();

1716 return false;

1717

1718 const NVPTXSubtarget *STI = TM.getSubtargetImpl();

1720 return false;

1721

1722 const bool IsVec = VT.isVector();

1724 SDLoc DL(N);

1725 SDValue N0 = N->getOperand(0);

1726 SDValue N1 = N->getOperand(1);

1728 auto GetConstant = [&](float Value) -> SDValue {

1729

1731 bool LosesInfo;

1734 if (IsVec) {

1735 auto API = APF.bitcastToAPInt();

1736 API = API.concat(API);

1737 auto Const = CurDAG->getTargetConstant(API, DL, MVT::i32);

1738 return SDValue(CurDAG->getMachineNode(NVPTX::MOV_B32_i, DL, VT, Const),

1739 0);

1740 }

1741 auto Const = CurDAG->getTargetConstantFP(APF, DL, VT);

1742 return SDValue(CurDAG->getMachineNode(NVPTX::MOV_BF16_i, DL, VT, Const), 0);

1743 };

1744

1745 switch (N->getOpcode()) {

1747

1748 Operands = {N0, GetConstant(1.0), N1};

1749 break;

1751

1752 Operands = {N1, GetConstant(-1.0), N0};

1753 break;

1755

1756

1757 Operands = {N0, N1, GetConstant(-0.0)};

1758 break;

1759 default:

1761 };

1762

1763 int Opcode = IsVec ? NVPTX::FMA_BF16x2rrr : NVPTX::FMA_BF16rrr;

1764 MachineSDNode *FMA = CurDAG->getMachineNode(Opcode, DL, VT, Operands);

1766 return true;

1767}

1768

1769SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {

1770 if (V.getOpcode() == ISD::BITCAST)

1771 V = V.getOperand(0);

1772

1774 return CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(V),

1775 V.getValueType());

1777 return CurDAG->getTargetConstantFP(CN->getValueAPF(), SDLoc(V),

1778 V.getValueType());

1779 return V;

1780}

1781

1782

1783

1786 std::vector &OutOps) {

1787 switch (ConstraintID) {

1788 default:

1789 return true;

1792 OutOps.push_back(Base);

1793 OutOps.push_back(Offset);

1794 return false;

1795 }

1796 }

1797 return true;

1798}

1799

1800void NVPTXDAGToDAGISel::SelectV2I64toI128(SDNode *N) {

1801

1802

1803

1804

1805

1806

1807

1808

1809

1810 SDValue Dst = N->getOperand(1);

1813

1817

1819 NewOps[0] = N->getOperand(0);

1820 NewOps[1] = Dst;

1821 NewOps[2] = SDValue(Mov, 0);

1822 if (N->getNumOperands() == 5)

1823 NewOps[3] = N->getOperand(4);

1825

1827}

1828

1829void NVPTXDAGToDAGISel::SelectI128toV2I64(SDNode *N) {

1830

1831

1832

1833

1834

1835

1836

1837

1838

1839 SDValue Ch = N->getOperand(0);

1840 SDValue Src = N->getOperand(1);

1841 SDValue Glue = N->getOperand(2);

1842 SDLoc DL(N);

1843

1844

1845

1846 SDNode *Mov = CurDAG->getMachineNode(

1847 NVPTX::I128toV2I64, DL,

1849 {Src, Ch, Glue});

1850

1852}

1853

1854bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {

1855 SDLoc DL(N);

1856 assert(N->getOpcode() == ISD::ATOMIC_FENCE);

1857 unsigned int FenceOp =

1859 Scopes[N->getConstantOperandVal(2)], Subtarget);

1861 SDNode *FenceNode = CurDAG->getMachineNode(FenceOp, DL, MVT::Other, Chain);

1863 return true;

1864}

1865

1873

1875 if (Scopes.empty())

1876 llvm_unreachable("NVPTX Scopes must be initialized before calling "

1877 "NVPTXScopes::operator[]");

1878

1879 auto S = Scopes.find(ID);

1880 if (S == Scopes.end()) {

1881 auto scopeName = Context->getSyncScopeName(ID);

1882 assert(scopeName.has_value() && "Scope name must exist.");

1883

1884

1886 for (const auto &Entry : Scopes) {

1887 if (auto name = Context->getSyncScopeName(Entry.first))

1888 supportedScopes.push_back(name->empty() ? "" : *name);

1889 }

1890

1892 formatv("NVPTX backend does not support syncscope \"{0}\" (ID={1}).\n"

1893 "Supported syncscopes are: {2}.",

1894 scopeName.value(), int(ID),

1896 }

1897 return S->second;

1898}

1899

1901

1902#define CP_ASYNC_BULK_TENSOR_OPCODE(dir, dim, mode, is_s32, suffix) \

1903 (is_s32 \

1904 ? NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix \

1905 : NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix)

1906

1907#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(dim, mode, is_ch, is_s32) \

1908 (is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, _CH)) \

1909 : (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, )))

1910

1912 bool IsShared32,

1913 bool IsCacheHint,

1914 bool IsIm2Col) {

1915 if (IsIm2Col) {

1916 switch (Dim) {

1917 case 3:

1919 IsShared32);

1920 case 4:

1922 IsShared32);

1923 case 5:

1925 IsShared32);

1926 default:

1928 "GetCpAsyncBulkTensorS2GReductionOpcode.");

1929 }

1930 } else {

1931 switch (Dim) {

1932 case 1:

1934 IsShared32);

1935 case 2:

1937 IsShared32);

1938 case 3:

1940 IsShared32);

1941 case 4:

1943 IsShared32);

1944 case 5:

1946 IsShared32);

1947 default:

1949 "GetCpAsyncBulkTensorS2GReductionOpcode.");

1950 }

1951 }

1952}

1953

1954void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(SDNode *N,

1955 unsigned RedOp,

1956 bool IsIm2Col) {

1957

1958

1959

1960

1961 size_t NumOps = N->getNumOperands();

1962 size_t NumDims = NumOps - 6;

1963 bool IsCacheHint = N->getConstantOperandVal(NumOps - 1) == 1;

1964 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);

1965

1966 SDLoc DL(N);

1968 Ops.push_back(getI32Imm(RedOp, DL));

1969 Ops.push_back(N->getOperand(0));

1970

1971 bool IsShared32 =

1974 NumDims, IsShared32, IsCacheHint, IsIm2Col);

1976}

1977

1978#define TCGEN05_ST_OPCODE(SHAPE, NUM) \

1979 (enableUnpack ? NVPTX::TCGEN05_ST_##SHAPE##_##NUM##_UNPACK \

1980 : NVPTX::TCGEN05_ST_##SHAPE##_##NUM)

1981

1983 switch (IID) {

1984 case Intrinsic::nvvm_tcgen05_st_16x64b_x1:

1986 case Intrinsic::nvvm_tcgen05_st_16x64b_x2:

1988 case Intrinsic::nvvm_tcgen05_st_16x64b_x4:

1990 case Intrinsic::nvvm_tcgen05_st_16x64b_x8:

1992 case Intrinsic::nvvm_tcgen05_st_16x64b_x16:

1994 case Intrinsic::nvvm_tcgen05_st_16x64b_x32:

1996 case Intrinsic::nvvm_tcgen05_st_16x64b_x64:

1998 case Intrinsic::nvvm_tcgen05_st_16x64b_x128:

2000 case Intrinsic::nvvm_tcgen05_st_16x128b_x1:

2002 case Intrinsic::nvvm_tcgen05_st_16x128b_x2:

2004 case Intrinsic::nvvm_tcgen05_st_16x128b_x4:

2006 case Intrinsic::nvvm_tcgen05_st_16x128b_x8:

2008 case Intrinsic::nvvm_tcgen05_st_16x128b_x16:

2010 case Intrinsic::nvvm_tcgen05_st_16x128b_x32:

2012 case Intrinsic::nvvm_tcgen05_st_16x128b_x64:

2014 case Intrinsic::nvvm_tcgen05_st_16x256b_x1:

2016 case Intrinsic::nvvm_tcgen05_st_16x256b_x2:

2018 case Intrinsic::nvvm_tcgen05_st_16x256b_x4:

2020 case Intrinsic::nvvm_tcgen05_st_16x256b_x8:

2022 case Intrinsic::nvvm_tcgen05_st_16x256b_x16:

2024 case Intrinsic::nvvm_tcgen05_st_16x256b_x32:

2026 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x1:

2028 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x2:

2030 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x4:

2032 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x8:

2034 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x16:

2036 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x32:

2038 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x64:

2040 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x128:

2042 case Intrinsic::nvvm_tcgen05_st_32x32b_x1:

2044 case Intrinsic::nvvm_tcgen05_st_32x32b_x2:

2046 case Intrinsic::nvvm_tcgen05_st_32x32b_x4:

2048 case Intrinsic::nvvm_tcgen05_st_32x32b_x8:

2050 case Intrinsic::nvvm_tcgen05_st_32x32b_x16:

2052 case Intrinsic::nvvm_tcgen05_st_32x32b_x32:

2054 case Intrinsic::nvvm_tcgen05_st_32x32b_x64:

2056 case Intrinsic::nvvm_tcgen05_st_32x32b_x128:

2058 }

2060}

2061

2062void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) {

2063 if (Subtarget->hasTcgen05InstSupport())

2065 "tcgen05.st is not supported on this architecture variant");

2066

2067 SDLoc DL(N);

2069

2071 N->getOperand(2)

2072 };

2073

2074 if (hasOffset)

2077 MVT::i32));

2078

2079 for (unsigned I = hasOffset ? 4 : 3; I < (N->getNumOperands() - 1); I++)

2081

2082 bool enableUnpack =

2084 ->getZExtValue();

2085

2086 Operands.push_back(N->getOperand(0));

2088 DL, N->getVTList(), Operands));

2089}

2090

2091bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) {

2092 unsigned IID = N->getConstantOperandVal(1);

2094 auto CastTy = [](TMARedTy Op) { return static_cast<unsigned>(Op); };

2095 switch (IID) {

2096 default:

2097 return false;

2098 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d:

2099 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d:

2100 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d:

2101 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d:

2102 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d:

2103 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::ADD));

2104 return true;

2105 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d:

2106 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d:

2107 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d:

2108 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::ADD),

2109 true);

2110 return true;

2111 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d:

2112 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d:

2113 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d:

2114 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d:

2115 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d:

2116 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MIN));

2117 return true;

2118 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d:

2119 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d:

2120 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d:

2121 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MIN),

2122 true);

2123 return true;

2124 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d:

2125 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d:

2126 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d:

2127 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d:

2128 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d:

2129 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MAX));

2130 return true;

2131 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d:

2132 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d:

2133 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d:

2134 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MAX),

2135 true);

2136 return true;

2137 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d:

2138 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d:

2139 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d:

2140 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d:

2141 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d:

2142 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::INC));

2143 return true;

2144 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d:

2145 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d:

2146 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d:

2147 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::INC),

2148 true);

2149 return true;

2150 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d:

2151 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d:

2152 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d:

2153 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d:

2154 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d:

2155 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::DEC));

2156 return true;

2157 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d:

2158 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d:

2159 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d:

2160 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::DEC),

2161 true);

2162 return true;

2163 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d:

2164 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d:

2165 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d:

2166 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d:

2167 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d:

2168 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::AND));

2169 return true;

2170 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d:

2171 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d:

2172 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d:

2173 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::AND),

2174 true);

2175 return true;

2176 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d:

2177 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d:

2178 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d:

2179 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d:

2180 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d:

2181 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::OR));

2182 return true;

2183 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d:

2184 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d:

2185 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d:

2186 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::OR),

2187 true);

2188 return true;

2189 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d:

2190 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d:

2191 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d:

2192 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d:

2193 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d:

2194 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::XOR));

2195 return true;

2196 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d:

2197 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d:

2198 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d:

2199 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::XOR),

2200 true);

2201 return true;

2202

2203 case Intrinsic::nvvm_tcgen05_st_16x64b_x1:

2204 case Intrinsic::nvvm_tcgen05_st_16x64b_x2:

2205 case Intrinsic::nvvm_tcgen05_st_16x64b_x4:

2206 case Intrinsic::nvvm_tcgen05_st_16x64b_x8:

2207 case Intrinsic::nvvm_tcgen05_st_16x64b_x16:

2208 case Intrinsic::nvvm_tcgen05_st_16x64b_x32:

2209 case Intrinsic::nvvm_tcgen05_st_16x64b_x64:

2210 case Intrinsic::nvvm_tcgen05_st_16x64b_x128:

2211 case Intrinsic::nvvm_tcgen05_st_32x32b_x1:

2212 case Intrinsic::nvvm_tcgen05_st_32x32b_x2:

2213 case Intrinsic::nvvm_tcgen05_st_32x32b_x4:

2214 case Intrinsic::nvvm_tcgen05_st_32x32b_x8:

2215 case Intrinsic::nvvm_tcgen05_st_32x32b_x16:

2216 case Intrinsic::nvvm_tcgen05_st_32x32b_x32:

2217 case Intrinsic::nvvm_tcgen05_st_32x32b_x64:

2218 case Intrinsic::nvvm_tcgen05_st_32x32b_x128:

2219 case Intrinsic::nvvm_tcgen05_st_16x128b_x1:

2220 case Intrinsic::nvvm_tcgen05_st_16x128b_x2:

2221 case Intrinsic::nvvm_tcgen05_st_16x128b_x4:

2222 case Intrinsic::nvvm_tcgen05_st_16x128b_x8:

2223 case Intrinsic::nvvm_tcgen05_st_16x128b_x16:

2224 case Intrinsic::nvvm_tcgen05_st_16x128b_x32:

2225 case Intrinsic::nvvm_tcgen05_st_16x128b_x64:

2226 case Intrinsic::nvvm_tcgen05_st_16x256b_x1:

2227 case Intrinsic::nvvm_tcgen05_st_16x256b_x2:

2228 case Intrinsic::nvvm_tcgen05_st_16x256b_x4:

2229 case Intrinsic::nvvm_tcgen05_st_16x256b_x8:

2230 case Intrinsic::nvvm_tcgen05_st_16x256b_x16:

2231 case Intrinsic::nvvm_tcgen05_st_16x256b_x32: {

2232 SelectTcgen05St(N);

2233 return true;

2234 }

2235

2236 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x1:

2237 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x2:

2238 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x4:

2239 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x8:

2240 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x16:

2241 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x32:

2242 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x64:

2243 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x128: {

2244 SelectTcgen05St(N, true);

2245 return true;

2246 }

2247 }

2248}

2249

2250void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {

2252 SDLoc dl(N);

2253

2257 Ops.append(N->op_begin() + 2, N->op_end());

2258 Ops.append({

2259 getI32Imm(getMemOrder(AN), dl),

2260 getI32Imm(getAtomicScope(AN), dl),

2262 Chain,

2263 });

2264

2268 ? NVPTX::ATOM_EXCH_B128

2269 : NVPTX::ATOM_CAS_B128;

2270

2271 auto *ATOM = CurDAG->getMachineNode(Opcode, dl, N->getVTList(), Ops);

2273

2275}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Atomic ordering constants.

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static unsigned getStoreVectorNumElts(SDNode *N)

Definition NVPTXISelDAGToDAG.cpp:1181

static bool isAddLike(const SDValue V)

Definition NVPTXISelDAGToDAG.cpp:1044

static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG)

Definition NVPTXISelDAGToDAG.cpp:1057

static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG)

Definition NVPTXISelDAGToDAG.cpp:1072

static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack)

Definition NVPTXISelDAGToDAG.cpp:1982

static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, std::optional< unsigned > Opcode_i16, std::optional< unsigned > Opcode_i32, std::optional< unsigned > Opcode_i64)

Definition NVPTXISelDAGToDAG.cpp:1019

static cl::opt< bool > EnableMADWide("nvptx-mad-wide-opt", cl::init(false), cl::Hidden, cl::desc("Enable MAD wide optimization"))

static unsigned GetCpAsyncBulkTensorS2GReductionOpcode(size_t Dim, bool IsShared32, bool IsCacheHint, bool IsIm2Col)

Definition NVPTXISelDAGToDAG.cpp:1911

#define TCGEN05_LD_OPCODE(SHAPE, NUM)

Definition NVPTXISelDAGToDAG.cpp:199

static SDValue stripAssertAlign(SDValue N)

Definition NVPTXISelDAGToDAG.cpp:1049

static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))

static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, NVPTXSubtarget const *T)

Definition NVPTXISelDAGToDAG.cpp:796

#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(dim, mode, is_ch, is_s32)

Definition NVPTXISelDAGToDAG.cpp:1907

#define TCGEN05_ST_OPCODE(SHAPE, NUM)

Definition NVPTXISelDAGToDAG.cpp:1978

static std::optional< NVPTX::AddressSpace > convertAS(unsigned AS)

Definition NVPTXISelDAGToDAG.cpp:496

static std::pair< SDValue, SDValue > selectADDR(SDValue Addr, SelectionDAG *DAG)

Definition NVPTXISelDAGToDAG.cpp:1091

static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack)

Definition NVPTXISelDAGToDAG.cpp:203

static bool canLowerToLDG(const MemSDNode &N, const NVPTXSubtarget &Subtarget, NVPTX::AddressSpace CodeAddrSpace)

Definition NVPTXISelDAGToDAG.cpp:788

This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics,...

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static const fltSemantics & BFloat()

static constexpr roundingMode rmNearestTiesToEven

Class for arbitrary precision integers.

LLVM_ABI APInt sext(unsigned width) const

Sign extend to a new width.

int64_t getSExtValue() const

Get sign extended value.

unsigned getSrcAddressSpace() const

unsigned getDestAddressSpace() const

const SDValue & getVal() const

uint64_t getZExtValue() const

FunctionPass class - This class is used to implement most global optimizations.

This is an important class for using LLVM in a threaded context.

bool isIndexed() const

Return true if this is a pre/post inc/dec load/store.

ISD::LoadExtType getExtensionType() const

Return whether this is a plain node, or one of the varieties of value-extending loads.

unsigned getVectorNumElements() const

bool isVector() const

Return true if this is a vector value type.

bool is32BitVector() const

Return true if this is a 32-bit vector type.

MVT getVectorElementType() const

bool is64BitVector() const

Return true if this is a 64-bit vector type.

This is an abstract virtual class for memory operations.

MachineMemOperand * getMemOperand() const

Return a MachineMemOperand object describing the memory reference performed by operation.

EVT getMemoryVT() const

Return the type of the in-memory value.

NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)

Definition NVPTXISelDAGToDAG.cpp:55

bool runOnMachineFunction(MachineFunction &MF) override

Definition NVPTXISelDAGToDAG.cpp:68

NVPTXDAGToDAGISel()=delete

static NVPTX::AddressSpace getAddrSpace(const MemSDNode *N)

Definition NVPTXISelDAGToDAG.cpp:517

bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override

SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.

Definition NVPTXISelDAGToDAG.cpp:1784

static unsigned getFromTypeWidthForLoad(const MemSDNode *Mem)

Definition NVPTXISelDAGToDAG.cpp:1331

const NVPTXSubtarget * Subtarget

const NVPTXTargetLowering * getTargetLowering() const override

bool hasNativeBF16Support(int Opcode) const

bool hasRelaxedMMIO() const

bool hasMemoryOrdering() const

NVPTX::DivPrecisionLevel getDivF32Level(const MachineFunction &MF, const SDNode &N) const

bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const

bool usePrecSqrtF32(const SDNode *N=nullptr) const

Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...

Represents one node in the SelectionDAG.

unsigned getNumValues() const

Return the number of values defined/returned by this operator.

const SDValue & getOperand(unsigned Num) const

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

SDNode * getNode() const

get the SDNode which holds the desired result

EVT getValueType() const

Return the ValueType of the referenced return value.

TypeSize getValueSizeInBits() const

Returns the size of the value in bits.

const SDValue & getOperand(unsigned i) const

SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)

void ReplaceUses(SDValue F, SDValue T)

ReplaceUses - replace all uses of the old node F with the use of the new node T.

void ReplaceNode(SDNode *F, SDNode *T)

Replace all uses of F with T, then remove F from the DAG.

SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)

virtual bool runOnMachineFunction(MachineFunction &mf)

This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...

SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)

LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)

These are used for target selectors to create a new node with specified return type(s),...

SDValue getTargetFrameIndex(int FI, EVT VT)

SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)

LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)

Gets or creates the specified node.

LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

const SDValue & getValue() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ ADD

Simple integer binary arithmetic operators.

@ FMA

FMA - Perform a * b + c with no intermediate rounding step.

@ INTRINSIC_VOID

OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...

@ FADD

Simple binary floating point operators.

@ AssertAlign

AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...

@ CopyFromReg

CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...

@ SHL

Shift and rotation operations.

@ EXTRACT_VECTOR_ELT

EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...

@ CopyToReg

CopyToReg - This node has three operands: a chain, a register number to set to this value,...

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ INTRINSIC_W_CHAIN

RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...

CondCode

ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...

@ ADDRESS_SPACE_SHARED_CLUSTER

@ ATOMIC_CMP_SWAP_B128

These nodes are used to lower atomic instructions with i128 type.

std::string ScopeToString(Scope S)

std::string OrderingToString(Ordering Order)

bool isPackedVectorTy(EVT VT)

initializer< Ty > init(const Ty &Val)

Scope

Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

int countr_one(T Value)

Count the number of ones from the least significant bit to the first zero bit.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)

createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...

Definition NVPTXISelDAGToDAG.cpp:50

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

constexpr bool isShiftedMask_64(uint64_t Value)

Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...

const char * toIRString(AtomicOrdering ao)

String used by LLVM IR to represent atomic ordering.

auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

constexpr bool isMask_64(uint64_t Value)

Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...

CodeGenOptLevel

Code generation optimization level.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

AtomicOrdering

Atomic ordering for LLVM's memory model.

DWARFExpression::Operation Op

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI void reportFatalUsageError(Error Err)

Report a fatal error that does not indicate a bug in LLVM.

Implement std::hash so that hash_code can be used in STL containers.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

bool isVector() const

Return true if this is a vector value type.

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

NVPTX::Scope operator[](SyncScope::ID ID) const

Definition NVPTXISelDAGToDAG.cpp:1874

bool empty() const

Definition NVPTXISelDAGToDAG.cpp:1900