LLVM: lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
23#include "llvm/IR/IntrinsicsNVPTX.h"
30#include
31
32using namespace llvm;
33
34#define DEBUG_TYPE "nvptx-isel"
35#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
36
39 cl::desc("Enable reciprocal sqrt optimization"));
40
41
42
43
46 cl::desc("Enable MAD wide optimization"));
47
48
49
54
59
61
63
67
73
75NVPTXDAGToDAGISel::getDivF32Level(const SDNode *N) const {
77}
78
79bool NVPTXDAGToDAGISel::usePrecSqrtF32(const SDNode *N) const {
81}
82
83bool NVPTXDAGToDAGISel::useF32FTZ() const {
84 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
85}
86
87bool NVPTXDAGToDAGISel::allowFMA() const {
88 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
90}
91
92bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
93
94bool NVPTXDAGToDAGISel::doMADWideOpt() const { return EnableMADWide; }
95
96
97
98void NVPTXDAGToDAGISel::Select(SDNode *N) {
99
100 if (N->isMachineOpcode()) {
101 N->setNodeId(-1);
102 return;
103 }
104
105 switch (N->getOpcode()) {
106 case ISD::LOAD:
107 case ISD::ATOMIC_LOAD:
109 if (tryLoad(N))
110 return;
111 break;
112 case ISD::STORE:
113 case ISD::ATOMIC_STORE:
114 if (tryStore(N))
115 return;
116 break;
117 case ISD::ATOMIC_FENCE:
118 if (tryFence(N))
119 return;
120 break;
122 tryUNPACK_VECTOR(N);
123 return;
125 if (tryEXTRACT_VECTOR_ELEMENT(N))
126 return;
127 break;
129 SelectSETP_F16X2(N);
130 return;
132 SelectSETP_BF16X2(N);
133 return;
137 if (tryLoadVector(N))
138 return;
139 break;
142 if (tryLDU(N))
143 return;
144 break;
148 if (tryStoreVector(N))
149 return;
150 break;
152 if (tryIntrinsicChain(N))
153 return;
154 break;
156 if (tryIntrinsicVoid(N))
157 return;
158 break;
162
163 if (tryBFE(N))
164 return;
165 break;
166 case ISD::ADDRSPACECAST:
167 SelectAddrSpaceCast(N);
168 return;
170 if (N->getOperand(1).getValueType() == MVT::i128) {
171 SelectV2I64toI128(N);
172 return;
173 }
174 break;
175 }
177 if (N->getOperand(1).getValueType() == MVT::i128) {
178 SelectI128toV2I64(N);
179 return;
180 }
181 break;
182 }
185 selectAtomicSwap128(N);
186 return;
190 if (tryBF16ArithToFMA(N))
191 return;
192 break;
193 default:
194 break;
195 }
196 SelectCode(N);
197}
198
199#define TCGEN05_LD_OPCODE(SHAPE, NUM) \
200 (enablePack ? NVPTX::TCGEN05_LD_##SHAPE##_##NUM##_PACK \
201 : NVPTX::TCGEN05_LD_##SHAPE##_##NUM)
202
204 switch (IID) {
205 case Intrinsic::nvvm_tcgen05_ld_16x64b_x1:
207 case Intrinsic::nvvm_tcgen05_ld_16x64b_x2:
209 case Intrinsic::nvvm_tcgen05_ld_16x64b_x4:
211 case Intrinsic::nvvm_tcgen05_ld_16x64b_x8:
213 case Intrinsic::nvvm_tcgen05_ld_16x64b_x16:
215 case Intrinsic::nvvm_tcgen05_ld_16x64b_x32:
217 case Intrinsic::nvvm_tcgen05_ld_16x64b_x64:
219 case Intrinsic::nvvm_tcgen05_ld_16x64b_x128:
221 case Intrinsic::nvvm_tcgen05_ld_16x128b_x1:
223 case Intrinsic::nvvm_tcgen05_ld_16x128b_x2:
225 case Intrinsic::nvvm_tcgen05_ld_16x128b_x4:
227 case Intrinsic::nvvm_tcgen05_ld_16x128b_x8:
229 case Intrinsic::nvvm_tcgen05_ld_16x128b_x16:
231 case Intrinsic::nvvm_tcgen05_ld_16x128b_x32:
233 case Intrinsic::nvvm_tcgen05_ld_16x128b_x64:
235 case Intrinsic::nvvm_tcgen05_ld_16x256b_x1:
237 case Intrinsic::nvvm_tcgen05_ld_16x256b_x2:
239 case Intrinsic::nvvm_tcgen05_ld_16x256b_x4:
241 case Intrinsic::nvvm_tcgen05_ld_16x256b_x8:
243 case Intrinsic::nvvm_tcgen05_ld_16x256b_x16:
245 case Intrinsic::nvvm_tcgen05_ld_16x256b_x32:
247 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x1:
249 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x2:
251 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x4:
253 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x8:
255 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x16:
257 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x32:
259 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x64:
261 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x128:
263 case Intrinsic::nvvm_tcgen05_ld_32x32b_x1:
265 case Intrinsic::nvvm_tcgen05_ld_32x32b_x2:
267 case Intrinsic::nvvm_tcgen05_ld_32x32b_x4:
269 case Intrinsic::nvvm_tcgen05_ld_32x32b_x8:
271 case Intrinsic::nvvm_tcgen05_ld_32x32b_x16:
273 case Intrinsic::nvvm_tcgen05_ld_32x32b_x32:
275 case Intrinsic::nvvm_tcgen05_ld_32x32b_x64:
277 case Intrinsic::nvvm_tcgen05_ld_32x32b_x128:
279 }
281}
282
283void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) {
284 if (->hasTcgen05InstSupport())
286 "tcgen05.ld is not supported on this architecture variant");
287
290
291 if (hasOffset) {
293 auto OffsetNode = CurDAG->getTargetConstant(
297 {N->getOperand(2), OffsetNode, N->getOperand(0)}));
298 } else {
302 {N->getOperand(2), N->getOperand(0)}));
303 }
304}
305
306bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
307 unsigned IID = N->getConstantOperandVal(1);
308 switch (IID) {
309 default:
310 return false;
311 case Intrinsic::nvvm_ldu_global_f:
312 case Intrinsic::nvvm_ldu_global_i:
313 case Intrinsic::nvvm_ldu_global_p:
314 return tryLDU(N);
315
316 case Intrinsic::nvvm_tcgen05_ld_16x64b_x1:
317 case Intrinsic::nvvm_tcgen05_ld_16x64b_x2:
318 case Intrinsic::nvvm_tcgen05_ld_16x64b_x4:
319 case Intrinsic::nvvm_tcgen05_ld_16x64b_x8:
320 case Intrinsic::nvvm_tcgen05_ld_16x64b_x16:
321 case Intrinsic::nvvm_tcgen05_ld_16x64b_x32:
322 case Intrinsic::nvvm_tcgen05_ld_16x64b_x64:
323 case Intrinsic::nvvm_tcgen05_ld_16x64b_x128:
324 case Intrinsic::nvvm_tcgen05_ld_16x128b_x1:
325 case Intrinsic::nvvm_tcgen05_ld_16x128b_x2:
326 case Intrinsic::nvvm_tcgen05_ld_16x128b_x4:
327 case Intrinsic::nvvm_tcgen05_ld_16x128b_x16:
328 case Intrinsic::nvvm_tcgen05_ld_16x128b_x32:
329 case Intrinsic::nvvm_tcgen05_ld_16x128b_x64:
330 case Intrinsic::nvvm_tcgen05_ld_16x256b_x1:
331 case Intrinsic::nvvm_tcgen05_ld_16x128b_x8:
332 case Intrinsic::nvvm_tcgen05_ld_16x256b_x2:
333 case Intrinsic::nvvm_tcgen05_ld_16x256b_x4:
334 case Intrinsic::nvvm_tcgen05_ld_16x256b_x8:
335 case Intrinsic::nvvm_tcgen05_ld_16x256b_x16:
336 case Intrinsic::nvvm_tcgen05_ld_16x256b_x32:
337 case Intrinsic::nvvm_tcgen05_ld_32x32b_x1:
338 case Intrinsic::nvvm_tcgen05_ld_32x32b_x2:
339 case Intrinsic::nvvm_tcgen05_ld_32x32b_x4:
340 case Intrinsic::nvvm_tcgen05_ld_32x32b_x8:
341 case Intrinsic::nvvm_tcgen05_ld_32x32b_x16:
342 case Intrinsic::nvvm_tcgen05_ld_32x32b_x32:
343 case Intrinsic::nvvm_tcgen05_ld_32x32b_x64:
344 case Intrinsic::nvvm_tcgen05_ld_32x32b_x128: {
345 SelectTcgen05Ld(N);
346 return true;
347 }
348
349 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x1:
350 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x2:
351 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x4:
352 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x8:
353 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x16:
354 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x32:
355 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x64:
356 case Intrinsic::nvvm_tcgen05_ld_16x32bx2_x128: {
357 SelectTcgen05Ld(N, true);
358 return true;
359 }
360 }
361}
362
363
364
367 const unsigned PTXCmpMode = [](ISD::CondCode CC) {
368 switch (CC) {
369 default:
373 return CmpMode::EQ;
376 return CmpMode::GT;
379 return CmpMode::GE;
382 return CmpMode::LT;
385 return CmpMode::LE;
388 return CmpMode::NE;
390 return CmpMode::NUM;
392 return CmpMode::NotANumber;
394 return CmpMode::EQU;
396 return CmpMode::GTU;
398 return CmpMode::GEU;
400 return CmpMode::LTU;
402 return CmpMode::LEU;
404 return CmpMode::NEU;
405 }
407 return CurDAG->getTargetConstant(PTXCmpMode, SDLoc(), MVT::i32);
408}
409
410bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
413 SDNode *SetP = CurDAG->getMachineNode(
414 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1,
415 {N->getOperand(0), N->getOperand(1), PTXCmpMode,
416 CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});
418 return true;
419}
420
421bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
424 SDNode *SetP = CurDAG->getMachineNode(
425 NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1,
426 {N->getOperand(0), N->getOperand(1), PTXCmpMode,
427 CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});
429 return true;
430}
431
432bool NVPTXDAGToDAGISel::tryUNPACK_VECTOR(SDNode *N) {
434 MVT EltVT = N->getSimpleValueType(0);
435
436 MachineSDNode *N2 =
437 CurDAG->getMachineNode(NVPTX::I64toV2I32, SDLoc(N), EltVT, EltVT, Vector);
438
440 return true;
441}
442
443
444
445bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
447
448 MVT VT = Vector.getSimpleValueType();
450 return false;
451
452 unsigned Opcode;
454 Opcode = NVPTX::I32toV2I16;
456 Opcode = NVPTX::I64toV2I32;
457 else
459
460
462 for (auto *U : Vector.getNode()->users()) {
464 continue;
465 if (U->getOperand(0) != Vector)
466 continue;
467 if (const ConstantSDNode *IdxConst =
469 if (IdxConst->getZExtValue() == 0)
471 else if (IdxConst->getZExtValue() == 1)
473 else
475 }
476 }
477
478
479
481 return false;
482
483
484
486 SDNode *ScatterOp =
487 CurDAG->getMachineNode(Opcode, SDLoc(N), EltVT, EltVT, Vector);
488 for (auto *Node : E0)
490 for (auto *Node : E1)
492
493 return true;
494}
495
496static std::optionalNVPTX::AddressSpace convertAS(unsigned AS) {
497 switch (AS) {
512 default:
513 return std::nullopt;
514 }
515}
516
518 return convertAS(N->getMemOperand()->getAddrSpace())
520}
521
523
526 auto Ordering = N->getMergedOrdering();
527 switch (Ordering) {
541 }
543}
544
546
547
550 return Scopes[N->getSyncScopeID()];
551}
552
553namespace {
554
555struct OperationOrderings {
556 NVPTX::Ordering InstructionOrdering, FenceOrdering;
557 OperationOrderings(NVPTX::Ordering IO = NVPTX::Ordering::NotAtomic,
558 NVPTX::Ordering FO = NVPTX::Ordering::NotAtomic)
559 : InstructionOrdering(IO), FenceOrdering(FO) {}
560};
561
562static OperationOrderings
566
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
657 }
658
659
660
664 !HasMemoryOrdering) {
666 formatv("PTX does not support \"atomic\" for orderings different than"
667 "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order "
668 "is: \"{}\".",
670 }
671
672
673
674
675
676
677
678
679 bool AddrGenericOrGlobalOrShared =
684 if (!AddrGenericOrGlobalOrShared)
686
687 bool UseRelaxedMMIO =
689
690 switch (Ordering) {
695
696
698 if (N->isVolatile())
701 else
704
705
707 if (->readMem())
709 formatv("PTX only supports Acquire Ordering on reads: {}",
710 N->getOperationName()));
713 if (->writeMem())
715 formatv("PTX only supports Release Ordering on writes: {}",
716 N->getOperationName()));
720 formatv("NVPTX does not support AcquireRelease Ordering on "
721 "read-modify-write "
722 "yet and PTX does not support it on loads or stores: {}",
723 N->getOperationName()));
724 }
726
727
728
729
730
731
732
734 if (N->readMem())
736 else if (N->writeMem())
738 else
740 formatv("NVPTX does not support SequentiallyConsistent Ordering on "
741 "read-modify-writes yet: {}",
742 N->getOperationName()));
743 return OperationOrderings(InstrOrder,
745 }
746 }
748 formatv("NVPTX backend does not support AtomicOrdering \"{}\" yet.",
750}
751
752}
753
756 switch (O) {
759
762
763
764
771 auto S = Scopes[N->getSyncScopeID()];
772
773
777
778
780 Subtarget->failIfClustersUnsupported("cluster scope");
781
782
784 }
786}
787
790
791
793 N.isInvariant();
794}
795
799 T->failIfClustersUnsupported(".cluster scope fence");
800
801
802 if (->hasSplitAcquireAndReleaseFences() &&
805
806 switch (O) {
808 switch (S) {
810 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_sys
811 : NVPTX::INT_MEMBAR_SYS;
813 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_cta
814 : NVPTX::INT_MEMBAR_CTA;
816 return NVPTX::atomic_thread_fence_acquire_cluster;
818 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_gpu
819 : NVPTX::INT_MEMBAR_GL;
823 formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
824 ScopeToString(S)));
825 }
826 break;
828 switch (S) {
830 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_sys
831 : NVPTX::INT_MEMBAR_SYS;
833 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_cta
834 : NVPTX::INT_MEMBAR_CTA;
836 return NVPTX::atomic_thread_fence_release_cluster;
838 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_gpu
839 : NVPTX::INT_MEMBAR_GL;
843 formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
844 ScopeToString(S)));
845 }
846 break;
848 switch (S) {
850 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_sys
851 : NVPTX::INT_MEMBAR_SYS;
853 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_cta
854 : NVPTX::INT_MEMBAR_CTA;
856 return NVPTX::atomic_thread_fence_acq_rel_cluster;
858 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
859 : NVPTX::INT_MEMBAR_GL;
863 formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
864 ScopeToString(S)));
865 }
866 break;
867 }
869 switch (S) {
871 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_sys
872 : NVPTX::INT_MEMBAR_SYS;
874 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_cta
875 : NVPTX::INT_MEMBAR_CTA;
877 return NVPTX::atomic_thread_fence_seq_cst_cluster;
879 return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
880 : NVPTX::INT_MEMBAR_GL;
884 ScopeToString(S)));
885 }
886 break;
887 }
893 formatv("Unsupported \"{}\" ordering and \"{}\" scope for fence.",
894 OrderingToString(O), ScopeToString(S)));
895 }
897}
898
899
900
901
902std::pair<NVPTX::Ordering, NVPTX::Scope>
903NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
905 auto [InstructionOrdering, FenceOrdering] =
907 auto Scope = getOperationScope(N, InstructionOrdering);
908
909
912 break;
915 Chain = SDValue(CurDAG->getMachineNode(Op, DL, MVT::Other, Chain), 0);
916 break;
917 }
918 default:
920 formatv("Unexpected fence ordering: \"{}\".",
922 }
923 return {InstructionOrdering, Scope};
924}
925
926void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
927 SDValue Src = N->getOperand(0);
932 assert(SrcAddrSpace != DstAddrSpace &&
933 "addrspacecast must be between different address spaces");
934
936
937
938 if (TM.is64Bit() && TM.getPointerSizeInBits(SrcAddrSpace) == 32) {
941 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u64_u32, DL, MVT::i64,
942 Src, CvtNone);
944 }
945
946 unsigned Opc;
947 switch (SrcAddrSpace) {
950 Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
951 break;
953 Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
954 break;
956 if (!TM.is64Bit())
958 "Shared cluster address space is only supported in 64-bit mode");
959 Opc = NVPTX::cvta_shared_cluster_64;
960 break;
962 Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
963 break;
965 Opc = TM.is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local;
966 break;
968 Opc = TM.is64Bit() ? NVPTX::cvta_param_64 : NVPTX::cvta_param;
969 break;
970 }
972 return;
973 } else {
974
975 if (SrcAddrSpace != 0)
976 report_fatal_error("Cannot cast between two non-generic address spaces");
977 unsigned Opc;
978 switch (DstAddrSpace) {
981 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
982 break;
984 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
985 break;
987 if (!TM.is64Bit())
989 "Shared cluster address space is only supported in 64-bit mode");
990 Opc = NVPTX::cvta_to_shared_cluster_64;
991 break;
993 Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
994 break;
996 Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
997 break;
999 Opc = TM.is64Bit() ? NVPTX::cvta_to_param_64 : NVPTX::cvta_to_param;
1000 break;
1001 }
1002
1003 SDNode *CVTA = CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src);
1004 if (TM.is64Bit() && TM.getPointerSizeInBits(DstAddrSpace) == 32) {
1007 CVTA = CurDAG->getMachineNode(NVPTX::CVT_u32_u64, DL, MVT::i32,
1008 SDValue(CVTA, 0), CvtNone);
1009 }
1010
1012 return;
1013 }
1014}
1015
1016
1017
1018static std::optional
1020 std::optional Opcode_i32,
1021 std::optional Opcode_i64) {
1022 switch (VT) {
1023 case MVT::f16:
1024 case MVT::i16:
1025 case MVT::bf16:
1026 return Opcode_i16;
1027 case MVT::v2f16:
1028 case MVT::v2bf16:
1029 case MVT::v2i16:
1030 case MVT::v4i8:
1031 case MVT::i32:
1032 case MVT::f32:
1033 return Opcode_i32;
1034 case MVT::v2f32:
1035 case MVT::v2i32:
1036 case MVT::i64:
1037 case MVT::f64:
1038 return Opcode_i64;
1039 default:
1040 return std::nullopt;
1041 }
1042}
1043
1045 return V.getOpcode() == ISD::ADD ||
1046 (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
1047}
1048
1052 return N;
1053}
1054
1055
1056
1061 GA->getValueType(0), GA->getOffset(),
1062 GA->getTargetFlags());
1065 ES->getTargetFlags());
1068
1069 return N;
1070}
1071
1074 APInt AccumulatedOffset(64u, 0);
1077 if (!CN)
1078 break;
1079
1080 const APInt CI = CN->getAPIntValue().sext(64);
1081 if (!(CI + AccumulatedOffset).isSignedIntN(32))
1082 break;
1083
1084 AccumulatedOffset += CI;
1086 }
1088 MVT::i32);
1089}
1090
1096
1097
1098
1099
1100
1101
1102
1103
1107 return true;
1108}
1109
1110bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
1112 assert(LD->readMem() && "Expected load");
1113
1114
1116 if (PlainLoad && PlainLoad->isIndexed())
1117 return false;
1118
1119
1120 const auto CodeAddrSpace = getAddrSpace(LD);
1122 return tryLDG(LD);
1123
1124 SDLoc DL(LD);
1126 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, LD);
1127
1128 const unsigned FromTypeWidth = LD->getMemoryVT().getSizeInBits();
1129
1130
1135
1136 uint32_t UsedBytesMask;
1137 switch (N->getOpcode()) {
1138 case ISD::LOAD:
1139 case ISD::ATOMIC_LOAD:
1140 UsedBytesMask = UINT32_MAX;
1141 break;
1143 UsedBytesMask = N->getConstantOperandVal(3);
1144 break;
1145 default:
1147 }
1148
1150 FromTypeWidth <= 128 && "Invalid width for load");
1151
1152
1155 getI32Imm(Scope, DL),
1156 getI32Imm(CodeAddrSpace, DL),
1157 getI32Imm(FromType, DL),
1158 getI32Imm(FromTypeWidth, DL),
1159 getI32Imm(UsedBytesMask, DL),
1162 Chain};
1163
1165 const std::optional Opcode =
1166 pickOpcodeForVT(TargetVT, NVPTX::LD_i16, NVPTX::LD_i32, NVPTX::LD_i64);
1167 if (!Opcode)
1168 return false;
1169
1170 SDNode *NVPTXLD = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);
1171 if (!NVPTXLD)
1172 return false;
1173
1174 MachineMemOperand *MemRef = LD->getMemOperand();
1176
1178 return true;
1179}
1180
1182 switch (N->getOpcode()) {
1184 return 2;
1186 return 4;
1188 return 8;
1189 default:
1191 }
1192}
1193
1194bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1196
1197
1198 const auto CodeAddrSpace = getAddrSpace(LD);
1200 return tryLDG(LD);
1201
1202 const MVT EltVT = LD->getSimpleValueType(0);
1203 SDLoc DL(LD);
1205 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, LD);
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215 const unsigned ExtensionType = N->getConstantOperandVal(4);
1218 : NVPTX::PTXLdStInstCode::Untyped;
1219
1221 const uint32_t UsedBytesMask = N->getConstantOperandVal(3);
1222
1224
1227 getI32Imm(Scope, DL),
1228 getI32Imm(CodeAddrSpace, DL),
1229 getI32Imm(FromType, DL),
1230 getI32Imm(FromTypeWidth, DL),
1231 getI32Imm(UsedBytesMask, DL),
1234 Chain};
1235
1236 std::optional Opcode;
1237 switch (N->getOpcode()) {
1238 default:
1242 NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2);
1243 break;
1246 NVPTX::LDV_i32_v4, NVPTX::LDV_i64_v4);
1247 break;
1250 NVPTX::LDV_i32_v8, {});
1251 break;
1252 }
1253 if (!Opcode)
1254 return false;
1255
1256 SDNode *NVPTXLD = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);
1257
1258 MachineMemOperand *MemRef = LD->getMemOperand();
1260
1262 return true;
1263}
1264
1265bool NVPTXDAGToDAGISel::tryLDG(MemSDNode *LD) {
1266 SDLoc DL(LD);
1267
1268 unsigned ExtensionType;
1269 uint32_t UsedBytesMask;
1271 ExtensionType = Load->getExtensionType();
1272 UsedBytesMask = UINT32_MAX;
1273 } else {
1274 ExtensionType = LD->getConstantOperandVal(4);
1275 UsedBytesMask = LD->getConstantOperandVal(3);
1276 }
1279 : NVPTX::PTXLdStInstCode::Untyped;
1280
1282
1283 assert(!(LD->getSimpleValueType(0).isVector() &&
1285
1288 getI32Imm(FromTypeWidth, DL),
1289 getI32Imm(UsedBytesMask, DL),
1292 LD->getChain()};
1293
1295 std::optional Opcode;
1296 switch (LD->getOpcode()) {
1297 default:
1299 case ISD::LOAD:
1300 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_GLOBAL_NC_i16,
1301 NVPTX::LD_GLOBAL_NC_i32, NVPTX::LD_GLOBAL_NC_i64);
1302 break;
1304 Opcode = pickOpcodeForVT(TargetVT, std::nullopt, NVPTX::LD_GLOBAL_NC_i32,
1305 NVPTX::LD_GLOBAL_NC_i64);
1306 break;
1308 Opcode =
1310 NVPTX::LD_GLOBAL_NC_v2i32, NVPTX::LD_GLOBAL_NC_v2i64);
1311 break;
1313 Opcode =
1315 NVPTX::LD_GLOBAL_NC_v4i32, NVPTX::LD_GLOBAL_NC_v4i64);
1316 break;
1319 NVPTX::LD_GLOBAL_NC_v8i32, {});
1320 break;
1321 }
1322 if (!Opcode)
1323 return false;
1324
1325 SDNode *NVPTXLDG = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);
1326
1328 return true;
1329}
1330
1334 auto ElementBitWidth = TotalWidth / NumElts;
1336 ElementBitWidth <= 128 && TotalWidth <= 256 &&
1337 "Invalid width for load");
1338 return ElementBitWidth;
1339}
1340
1341bool NVPTXDAGToDAGISel::tryLDU(SDNode *N) {
1343
1347
1348
1349
1352
1355
1356 std::optional Opcode;
1357 switch (N->getOpcode()) {
1358 default:
1361 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LDU_GLOBAL_i16,
1362 NVPTX::LDU_GLOBAL_i32, NVPTX::LDU_GLOBAL_i64);
1363 break;
1365 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LDU_GLOBAL_v2i16,
1366 NVPTX::LDU_GLOBAL_v2i32, NVPTX::LDU_GLOBAL_v2i64);
1367 break;
1369 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LDU_GLOBAL_v4i16,
1370 NVPTX::LDU_GLOBAL_v4i32, {});
1371 break;
1372 }
1373 if (!Opcode)
1374 return false;
1375
1376 SDNode *NVPTXLDU = CurDAG->getMachineNode(*Opcode, DL, LD->getVTList(), Ops);
1377
1379 return true;
1380}
1381
1382bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1384 assert(ST->writeMem() && "Expected store");
1387 assert((PlainStore || AtomicStore) && "Expected store");
1388
1389
1390 if (PlainStore && PlainStore->isIndexed())
1391 return false;
1392
1393
1394 const auto CodeAddrSpace = getAddrSpace(ST);
1395
1396 SDLoc DL(ST);
1398 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, ST);
1399
1400
1401 const unsigned ToTypeWidth = ST->getMemoryVT().getSizeInBits();
1402
1403
1405
1406 assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
1407 "Invalid width for store");
1408
1411 getI32Imm(Ordering, DL),
1412 getI32Imm(Scope, DL),
1413 getI32Imm(CodeAddrSpace, DL),
1414 getI32Imm(ToTypeWidth, DL),
1417 Chain};
1418
1419 const std::optional Opcode =
1421 NVPTX::ST_i32, NVPTX::ST_i64);
1422 if (!Opcode)
1423 return false;
1424
1425 SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
1426
1427 if (!NVPTXST)
1428 return false;
1429
1430 MachineMemOperand *MemRef = ST->getMemOperand();
1433 return true;
1434}
1435
1436bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1438 const unsigned TotalWidth = ST->getMemoryVT().getSizeInBits();
1439
1440
1441 const auto CodeAddrSpace = getAddrSpace(ST);
1443 report_fatal_error("Cannot store to pointer that points to constant "
1444 "memory space");
1445 }
1446
1447 SDLoc DL(ST);
1449 const auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, ST);
1450
1452
1454 for (auto &V : ST->ops().slice(1, NumElts))
1455 Ops.push_back(selectPossiblyImm(V));
1457 const unsigned ToTypeWidth = TotalWidth / NumElts;
1458
1459 assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
1460 TotalWidth <= 256 && "Invalid width for store");
1461
1463 Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
1464 getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,
1466
1468 ST->getOperand(1).getSimpleValueType().SimpleTy;
1469 std::optional Opcode;
1470 switch (ST->getOpcode()) {
1471 default:
1472 return false;
1474 Opcode = pickOpcodeForVT(EltVT, NVPTX::STV_i16_v2, NVPTX::STV_i32_v2,
1475 NVPTX::STV_i64_v2);
1476 break;
1478 Opcode = pickOpcodeForVT(EltVT, NVPTX::STV_i16_v4, NVPTX::STV_i32_v4,
1479 NVPTX::STV_i64_v4);
1480 break;
1482 Opcode = pickOpcodeForVT(EltVT, {}, NVPTX::STV_i32_v8,
1483 {});
1484 break;
1485 }
1486
1487 if (!Opcode)
1488 return false;
1489
1490 SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
1491
1492 MachineMemOperand *MemRef = ST->getMemOperand();
1494
1496 return true;
1497}
1498
1499
1500
1501bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
1508 bool IsSigned = false;
1509
1510 if (N->getOpcode() == ISD::AND) {
1511
1512
1515 }
1516
1518 if (!Mask) {
1519
1520 return false;
1521 }
1522
1523
1524 uint64_t MaskVal = Mask->getZExtValue();
1526
1527
1528
1529 return false;
1530 }
1531
1532
1533 int64_t NumBits = countr_one(MaskVal);
1534 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
1535
1537
1538 Val = LHS.getNode()->getOperand(0);
1539 Start = LHS.getNode()->getOperand(1);
1541 if (StartConst) {
1542 uint64_t StartVal = StartConst->getZExtValue();
1543
1544
1545 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
1546 if (NumBits > GoodBits) {
1547
1548
1549
1550 return false;
1551 }
1552 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
1553 } else {
1554
1555
1556
1557
1558 return false;
1559 }
1560 } else {
1561
1562
1563
1564 return false;
1565 }
1566 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
1569 if (!ShiftCnst) {
1570
1571 return false;
1572 }
1573
1574 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
1575
1576 SDValue AndLHS = LHS->getOperand(0);
1577 SDValue AndRHS = LHS->getOperand(1);
1578
1579
1582 }
1583
1585 if (!MaskCnst) {
1586
1587 return false;
1588 }
1589
1590 uint64_t MaskVal = MaskCnst->getZExtValue();
1591 uint64_t NumZeros;
1592 uint64_t NumBits;
1594 NumZeros = 0;
1595
1596
1601
1602
1603
1604 NumBits = NumZeros + NumOnes - ShiftAmt;
1605 } else {
1606
1607 return false;
1608 }
1609
1610 if (ShiftAmt < NumZeros) {
1611
1612
1613 return false;
1614 }
1615
1616 Val = AndLHS;
1617 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
1618 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
1619
1620
1621
1622
1623
1626 } else if (LHS->getOpcode() == ISD::SHL) {
1627
1628
1629
1630
1631
1632
1633
1634 Val = LHS->getOperand(0);
1635
1636 SDValue ShlRHS = LHS->getOperand(1);
1638 if (!ShlCnst) {
1639
1640 return false;
1641 }
1642 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
1643
1646 if (!ShrCnst) {
1647
1648 return false;
1649 }
1650 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
1651
1652
1653 if (OuterShiftAmt < InnerShiftAmt) {
1654 return false;
1655 }
1656
1657
1658
1659
1661 return false;
1662 }
1663
1664 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
1665 MVT::i32);
1667 DL, MVT::i32);
1668
1669 if (N->getOpcode() == ISD::SRA) {
1670
1671
1672 IsSigned = true;
1673 }
1674 } else {
1675
1676 return false;
1677 }
1678 } else {
1679
1680 return false;
1681 }
1682
1683
1684 unsigned Opc;
1685
1686
1688 if (IsSigned) {
1689 Opc = NVPTX::BFE_S32rii;
1690 } else {
1691 Opc = NVPTX::BFE_U32rii;
1692 }
1693 } else if (Val.getValueType() == MVT::i64) {
1694 if (IsSigned) {
1695 Opc = NVPTX::BFE_S64rii;
1696 } else {
1697 Opc = NVPTX::BFE_U64rii;
1698 }
1699 } else {
1700
1701 return false;
1702 }
1703
1706 };
1707
1709 return true;
1710}
1711
1712
1713bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
1714 EVT VT = SDValue(N, 0).getValueType();
1716 return false;
1717
1718 const NVPTXSubtarget *STI = TM.getSubtargetImpl();
1720 return false;
1721
1722 const bool IsVec = VT.isVector();
1725 SDValue N0 = N->getOperand(0);
1726 SDValue N1 = N->getOperand(1);
1728 auto GetConstant = [&](float Value) -> SDValue {
1729
1731 bool LosesInfo;
1734 if (IsVec) {
1735 auto API = APF.bitcastToAPInt();
1736 API = API.concat(API);
1737 auto Const = CurDAG->getTargetConstant(API, DL, MVT::i32);
1738 return SDValue(CurDAG->getMachineNode(NVPTX::MOV_B32_i, DL, VT, Const),
1739 0);
1740 }
1741 auto Const = CurDAG->getTargetConstantFP(APF, DL, VT);
1742 return SDValue(CurDAG->getMachineNode(NVPTX::MOV_BF16_i, DL, VT, Const), 0);
1743 };
1744
1745 switch (N->getOpcode()) {
1747
1748 Operands = {N0, GetConstant(1.0), N1};
1749 break;
1751
1752 Operands = {N1, GetConstant(-1.0), N0};
1753 break;
1755
1756
1757 Operands = {N0, N1, GetConstant(-0.0)};
1758 break;
1759 default:
1761 };
1762
1763 int Opcode = IsVec ? NVPTX::FMA_BF16x2rrr : NVPTX::FMA_BF16rrr;
1764 MachineSDNode *FMA = CurDAG->getMachineNode(Opcode, DL, VT, Operands);
1766 return true;
1767}
1768
1769SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
1770 if (V.getOpcode() == ISD::BITCAST)
1772
1774 return CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(V),
1775 V.getValueType());
1777 return CurDAG->getTargetConstantFP(CN->getValueAPF(), SDLoc(V),
1778 V.getValueType());
1779 return V;
1780}
1781
1782
1783
1786 std::vector &OutOps) {
1787 switch (ConstraintID) {
1788 default:
1789 return true;
1792 OutOps.push_back(Base);
1793 OutOps.push_back(Offset);
1794 return false;
1795 }
1796 }
1797 return true;
1798}
1799
1800void NVPTXDAGToDAGISel::SelectV2I64toI128(SDNode *N) {
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810 SDValue Dst = N->getOperand(1);
1813
1817
1819 NewOps[0] = N->getOperand(0);
1820 NewOps[1] = Dst;
1821 NewOps[2] = SDValue(Mov, 0);
1822 if (N->getNumOperands() == 5)
1823 NewOps[3] = N->getOperand(4);
1825
1827}
1828
1829void NVPTXDAGToDAGISel::SelectI128toV2I64(SDNode *N) {
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839 SDValue Ch = N->getOperand(0);
1840 SDValue Src = N->getOperand(1);
1841 SDValue Glue = N->getOperand(2);
1843
1844
1845
1846 SDNode *Mov = CurDAG->getMachineNode(
1847 NVPTX::I128toV2I64, DL,
1849 {Src, Ch, Glue});
1850
1852}
1853
1854bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {
1856 assert(N->getOpcode() == ISD::ATOMIC_FENCE);
1857 unsigned int FenceOp =
1859 Scopes[N->getConstantOperandVal(2)], Subtarget);
1861 SDNode *FenceNode = CurDAG->getMachineNode(FenceOp, DL, MVT::Other, Chain);
1863 return true;
1864}
1865
1873
1875 if (Scopes.empty())
1876 llvm_unreachable("NVPTX Scopes must be initialized before calling "
1877 "NVPTXScopes::operator[]");
1878
1879 auto S = Scopes.find(ID);
1880 if (S == Scopes.end()) {
1881 auto scopeName = Context->getSyncScopeName(ID);
1882 assert(scopeName.has_value() && "Scope name must exist.");
1883
1884
1886 for (const auto &Entry : Scopes) {
1887 if (auto name = Context->getSyncScopeName(Entry.first))
1888 supportedScopes.push_back(name->empty() ? "" : *name);
1889 }
1890
1892 formatv("NVPTX backend does not support syncscope \"{0}\" (ID={1}).\n"
1893 "Supported syncscopes are: {2}.",
1894 scopeName.value(), int(ID),
1896 }
1897 return S->second;
1898}
1899
1901
1902#define CP_ASYNC_BULK_TENSOR_OPCODE(dir, dim, mode, is_s32, suffix) \
1903 (is_s32 \
1904 ? NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix \
1905 : NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix)
1906
1907#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(dim, mode, is_ch, is_s32) \
1908 (is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, _CH)) \
1909 : (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, )))
1910
1912 bool IsShared32,
1913 bool IsCacheHint,
1914 bool IsIm2Col) {
1915 if (IsIm2Col) {
1916 switch (Dim) {
1917 case 3:
1919 IsShared32);
1920 case 4:
1922 IsShared32);
1923 case 5:
1925 IsShared32);
1926 default:
1928 "GetCpAsyncBulkTensorS2GReductionOpcode.");
1929 }
1930 } else {
1931 switch (Dim) {
1932 case 1:
1934 IsShared32);
1935 case 2:
1937 IsShared32);
1938 case 3:
1940 IsShared32);
1941 case 4:
1943 IsShared32);
1944 case 5:
1946 IsShared32);
1947 default:
1949 "GetCpAsyncBulkTensorS2GReductionOpcode.");
1950 }
1951 }
1952}
1953
1954void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(SDNode *N,
1955 unsigned RedOp,
1956 bool IsIm2Col) {
1957
1958
1959
1960
1961 size_t NumOps = N->getNumOperands();
1962 size_t NumDims = NumOps - 6;
1963 bool IsCacheHint = N->getConstantOperandVal(NumOps - 1) == 1;
1964 size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2);
1965
1968 Ops.push_back(getI32Imm(RedOp, DL));
1969 Ops.push_back(N->getOperand(0));
1970
1971 bool IsShared32 =
1974 NumDims, IsShared32, IsCacheHint, IsIm2Col);
1976}
1977
1978#define TCGEN05_ST_OPCODE(SHAPE, NUM) \
1979 (enableUnpack ? NVPTX::TCGEN05_ST_##SHAPE##_##NUM##_UNPACK \
1980 : NVPTX::TCGEN05_ST_##SHAPE##_##NUM)
1981
1983 switch (IID) {
1984 case Intrinsic::nvvm_tcgen05_st_16x64b_x1:
1986 case Intrinsic::nvvm_tcgen05_st_16x64b_x2:
1988 case Intrinsic::nvvm_tcgen05_st_16x64b_x4:
1990 case Intrinsic::nvvm_tcgen05_st_16x64b_x8:
1992 case Intrinsic::nvvm_tcgen05_st_16x64b_x16:
1994 case Intrinsic::nvvm_tcgen05_st_16x64b_x32:
1996 case Intrinsic::nvvm_tcgen05_st_16x64b_x64:
1998 case Intrinsic::nvvm_tcgen05_st_16x64b_x128:
2000 case Intrinsic::nvvm_tcgen05_st_16x128b_x1:
2002 case Intrinsic::nvvm_tcgen05_st_16x128b_x2:
2004 case Intrinsic::nvvm_tcgen05_st_16x128b_x4:
2006 case Intrinsic::nvvm_tcgen05_st_16x128b_x8:
2008 case Intrinsic::nvvm_tcgen05_st_16x128b_x16:
2010 case Intrinsic::nvvm_tcgen05_st_16x128b_x32:
2012 case Intrinsic::nvvm_tcgen05_st_16x128b_x64:
2014 case Intrinsic::nvvm_tcgen05_st_16x256b_x1:
2016 case Intrinsic::nvvm_tcgen05_st_16x256b_x2:
2018 case Intrinsic::nvvm_tcgen05_st_16x256b_x4:
2020 case Intrinsic::nvvm_tcgen05_st_16x256b_x8:
2022 case Intrinsic::nvvm_tcgen05_st_16x256b_x16:
2024 case Intrinsic::nvvm_tcgen05_st_16x256b_x32:
2026 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x1:
2028 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x2:
2030 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x4:
2032 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x8:
2034 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x16:
2036 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x32:
2038 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x64:
2040 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x128:
2042 case Intrinsic::nvvm_tcgen05_st_32x32b_x1:
2044 case Intrinsic::nvvm_tcgen05_st_32x32b_x2:
2046 case Intrinsic::nvvm_tcgen05_st_32x32b_x4:
2048 case Intrinsic::nvvm_tcgen05_st_32x32b_x8:
2050 case Intrinsic::nvvm_tcgen05_st_32x32b_x16:
2052 case Intrinsic::nvvm_tcgen05_st_32x32b_x32:
2054 case Intrinsic::nvvm_tcgen05_st_32x32b_x64:
2056 case Intrinsic::nvvm_tcgen05_st_32x32b_x128:
2058 }
2060}
2061
2062void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) {
2063 if (->hasTcgen05InstSupport())
2065 "tcgen05.st is not supported on this architecture variant");
2066
2069
2071 N->getOperand(2)
2072 };
2073
2074 if (hasOffset)
2077 MVT::i32));
2078
2079 for (unsigned I = hasOffset ? 4 : 3; I < (N->getNumOperands() - 1); I++)
2081
2082 bool enableUnpack =
2084 ->getZExtValue();
2085
2086 Operands.push_back(N->getOperand(0));
2088 DL, N->getVTList(), Operands));
2089}
2090
2091bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) {
2092 unsigned IID = N->getConstantOperandVal(1);
2094 auto CastTy = [](TMARedTy Op) { return static_cast<unsigned>(Op); };
2095 switch (IID) {
2096 default:
2097 return false;
2098 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d:
2099 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d:
2100 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d:
2101 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_4d:
2102 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_5d:
2103 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::ADD));
2104 return true;
2105 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_3d:
2106 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_4d:
2107 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_im2col_5d:
2108 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::ADD),
2109 true);
2110 return true;
2111 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_1d:
2112 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_2d:
2113 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_3d:
2114 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_4d:
2115 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_tile_5d:
2116 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MIN));
2117 return true;
2118 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_3d:
2119 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_4d:
2120 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_min_im2col_5d:
2121 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MIN),
2122 true);
2123 return true;
2124 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_1d:
2125 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_2d:
2126 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_3d:
2127 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_4d:
2128 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_tile_5d:
2129 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MAX));
2130 return true;
2131 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_3d:
2132 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_4d:
2133 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_max_im2col_5d:
2134 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::MAX),
2135 true);
2136 return true;
2137 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_1d:
2138 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_2d:
2139 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_3d:
2140 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_4d:
2141 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_tile_5d:
2142 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::INC));
2143 return true;
2144 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_3d:
2145 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_4d:
2146 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_inc_im2col_5d:
2147 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::INC),
2148 true);
2149 return true;
2150 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_1d:
2151 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_2d:
2152 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_3d:
2153 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_4d:
2154 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_tile_5d:
2155 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::DEC));
2156 return true;
2157 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_3d:
2158 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_4d:
2159 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_dec_im2col_5d:
2160 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::DEC),
2161 true);
2162 return true;
2163 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_1d:
2164 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_2d:
2165 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_3d:
2166 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_4d:
2167 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_tile_5d:
2168 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::AND));
2169 return true;
2170 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_3d:
2171 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_4d:
2172 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_and_im2col_5d:
2173 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::AND),
2174 true);
2175 return true;
2176 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_1d:
2177 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_2d:
2178 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_3d:
2179 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_4d:
2180 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_tile_5d:
2181 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::OR));
2182 return true;
2183 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_3d:
2184 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_4d:
2185 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_or_im2col_5d:
2186 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::OR),
2187 true);
2188 return true;
2189 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_1d:
2190 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_2d:
2191 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_3d:
2192 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_4d:
2193 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_tile_5d:
2194 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::XOR));
2195 return true;
2196 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_3d:
2197 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_4d:
2198 case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_xor_im2col_5d:
2199 SelectCpAsyncBulkTensorReduceCommon(N, CastTy(TMARedTy::XOR),
2200 true);
2201 return true;
2202
2203 case Intrinsic::nvvm_tcgen05_st_16x64b_x1:
2204 case Intrinsic::nvvm_tcgen05_st_16x64b_x2:
2205 case Intrinsic::nvvm_tcgen05_st_16x64b_x4:
2206 case Intrinsic::nvvm_tcgen05_st_16x64b_x8:
2207 case Intrinsic::nvvm_tcgen05_st_16x64b_x16:
2208 case Intrinsic::nvvm_tcgen05_st_16x64b_x32:
2209 case Intrinsic::nvvm_tcgen05_st_16x64b_x64:
2210 case Intrinsic::nvvm_tcgen05_st_16x64b_x128:
2211 case Intrinsic::nvvm_tcgen05_st_32x32b_x1:
2212 case Intrinsic::nvvm_tcgen05_st_32x32b_x2:
2213 case Intrinsic::nvvm_tcgen05_st_32x32b_x4:
2214 case Intrinsic::nvvm_tcgen05_st_32x32b_x8:
2215 case Intrinsic::nvvm_tcgen05_st_32x32b_x16:
2216 case Intrinsic::nvvm_tcgen05_st_32x32b_x32:
2217 case Intrinsic::nvvm_tcgen05_st_32x32b_x64:
2218 case Intrinsic::nvvm_tcgen05_st_32x32b_x128:
2219 case Intrinsic::nvvm_tcgen05_st_16x128b_x1:
2220 case Intrinsic::nvvm_tcgen05_st_16x128b_x2:
2221 case Intrinsic::nvvm_tcgen05_st_16x128b_x4:
2222 case Intrinsic::nvvm_tcgen05_st_16x128b_x8:
2223 case Intrinsic::nvvm_tcgen05_st_16x128b_x16:
2224 case Intrinsic::nvvm_tcgen05_st_16x128b_x32:
2225 case Intrinsic::nvvm_tcgen05_st_16x128b_x64:
2226 case Intrinsic::nvvm_tcgen05_st_16x256b_x1:
2227 case Intrinsic::nvvm_tcgen05_st_16x256b_x2:
2228 case Intrinsic::nvvm_tcgen05_st_16x256b_x4:
2229 case Intrinsic::nvvm_tcgen05_st_16x256b_x8:
2230 case Intrinsic::nvvm_tcgen05_st_16x256b_x16:
2231 case Intrinsic::nvvm_tcgen05_st_16x256b_x32: {
2232 SelectTcgen05St(N);
2233 return true;
2234 }
2235
2236 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x1:
2237 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x2:
2238 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x4:
2239 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x8:
2240 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x16:
2241 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x32:
2242 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x64:
2243 case Intrinsic::nvvm_tcgen05_st_16x32bx2_x128: {
2244 SelectTcgen05St(N, true);
2245 return true;
2246 }
2247 }
2248}
2249
2250void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
2252 SDLoc dl(N);
2253
2257 Ops.append(N->op_begin() + 2, N->op_end());
2258 Ops.append({
2259 getI32Imm(getMemOrder(AN), dl),
2260 getI32Imm(getAtomicScope(AN), dl),
2262 Chain,
2263 });
2264
2268 ? NVPTX::ATOM_EXCH_B128
2269 : NVPTX::ATOM_CAS_B128;
2270
2271 auto *ATOM = CurDAG->getMachineNode(Opcode, dl, N->getVTList(), Ops);
2273
2275}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static unsigned getStoreVectorNumElts(SDNode *N)
Definition NVPTXISelDAGToDAG.cpp:1181
static bool isAddLike(const SDValue V)
Definition NVPTXISelDAGToDAG.cpp:1044
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG)
Definition NVPTXISelDAGToDAG.cpp:1057
static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG)
Definition NVPTXISelDAGToDAG.cpp:1072
static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack)
Definition NVPTXISelDAGToDAG.cpp:1982
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, std::optional< unsigned > Opcode_i16, std::optional< unsigned > Opcode_i32, std::optional< unsigned > Opcode_i64)
Definition NVPTXISelDAGToDAG.cpp:1019
static cl::opt< bool > EnableMADWide("nvptx-mad-wide-opt", cl::init(false), cl::Hidden, cl::desc("Enable MAD wide optimization"))
static unsigned GetCpAsyncBulkTensorS2GReductionOpcode(size_t Dim, bool IsShared32, bool IsCacheHint, bool IsIm2Col)
Definition NVPTXISelDAGToDAG.cpp:1911
#define TCGEN05_LD_OPCODE(SHAPE, NUM)
Definition NVPTXISelDAGToDAG.cpp:199
static SDValue stripAssertAlign(SDValue N)
Definition NVPTXISelDAGToDAG.cpp:1049
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, NVPTXSubtarget const *T)
Definition NVPTXISelDAGToDAG.cpp:796
#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(dim, mode, is_ch, is_s32)
Definition NVPTXISelDAGToDAG.cpp:1907
#define TCGEN05_ST_OPCODE(SHAPE, NUM)
Definition NVPTXISelDAGToDAG.cpp:1978
static std::optional< NVPTX::AddressSpace > convertAS(unsigned AS)
Definition NVPTXISelDAGToDAG.cpp:496
static std::pair< SDValue, SDValue > selectADDR(SDValue Addr, SelectionDAG *DAG)
Definition NVPTXISelDAGToDAG.cpp:1091
static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack)
Definition NVPTXISelDAGToDAG.cpp:203
static bool canLowerToLDG(const MemSDNode &N, const NVPTXSubtarget &Subtarget, NVPTX::AddressSpace CodeAddrSpace)
Definition NVPTXISelDAGToDAG.cpp:788
This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics,...
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static const fltSemantics & BFloat()
static constexpr roundingMode rmNearestTiesToEven
Class for arbitrary precision integers.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
int64_t getSExtValue() const
Get sign extended value.
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
const SDValue & getVal() const
uint64_t getZExtValue() const
FunctionPass class - This class is used to implement most global optimizations.
This is an important class for using LLVM in a threaded context.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is32BitVector() const
Return true if this is a 32-bit vector type.
MVT getVectorElementType() const
bool is64BitVector() const
Return true if this is a 64-bit vector type.
This is an abstract virtual class for memory operations.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
EVT getMemoryVT() const
Return the type of the in-memory value.
NVPTXDAGToDAGISelLegacy(NVPTXTargetMachine &tm, CodeGenOptLevel OptLevel)
Definition NVPTXISelDAGToDAG.cpp:55
bool runOnMachineFunction(MachineFunction &MF) override
Definition NVPTXISelDAGToDAG.cpp:68
NVPTXDAGToDAGISel()=delete
static NVPTX::AddressSpace getAddrSpace(const MemSDNode *N)
Definition NVPTXISelDAGToDAG.cpp:517
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
Definition NVPTXISelDAGToDAG.cpp:1784
static unsigned getFromTypeWidthForLoad(const MemSDNode *Mem)
Definition NVPTXISelDAGToDAG.cpp:1331
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool hasNativeBF16Support(int Opcode) const
bool hasRelaxedMMIO() const
bool hasMemoryOrdering() const
NVPTX::DivPrecisionLevel getDivF32Level(const MachineFunction &MF, const SDNode &N) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32(const SDNode *N=nullptr) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ FADD
Simple binary floating point operators.
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ ADDRESS_SPACE_SHARED_CLUSTER
@ ATOMIC_CMP_SWAP_B128
These nodes are used to lower atomic instructions with i128 type.
std::string ScopeToString(Scope S)
std::string OrderingToString(Ordering Order)
bool isPackedVectorTy(EVT VT)
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
Definition NVPTXISelDAGToDAG.cpp:50
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
NVPTX::Scope operator[](SyncScope::ID ID) const
Definition NVPTXISelDAGToDAG.cpp:1874
bool empty() const
Definition NVPTXISelDAGToDAG.cpp:1900