LLVM: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
30#include
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
39 llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49
50
51
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57
58
59
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
67}
68
69
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
72}
73
74
75unsigned getExpcntBitShift(unsigned VersionMajor) {
77}
78
79
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
85}
86
87
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
90}
91
92
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
103}
104
105
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
108}
109
110
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
113}
114
115
116unsigned getDscntBitWidth(unsigned VersionMajor) {
118}
119
120
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
126}
127
128
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
131}
132
133
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
136}
137
138
139unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
141}
142
143
144inline unsigned getVaSdstBitWidth() { return 3; }
145
146
147inline unsigned getVaSdstBitShift() { return 9; }
148
149
150inline unsigned getVmVsrcBitWidth() { return 3; }
151
152
153inline unsigned getVmVsrcBitShift() { return 2; }
154
155
156inline unsigned getVaVdstBitWidth() { return 4; }
157
158
159inline unsigned getVaVdstBitShift() { return 12; }
160
161
162inline unsigned getVaVccBitWidth() { return 1; }
163
164
165inline unsigned getVaVccBitShift() { return 1; }
166
167
168inline unsigned getSaSdstBitWidth() { return 1; }
169
170
171inline unsigned getSaSdstBitShift() { return 0; }
172
173
174inline unsigned getVaSsrcBitWidth() { return 1; }
175
176
177inline unsigned getVaSsrcBitShift() { return 8; }
178
179
180inline unsigned getHoldCntWidth() { return 1; }
181
182
183inline unsigned getHoldCntBitShift() { return 7; }
184
185}
186
187namespace llvm {
188
190
191
192
196
197
201
204 M.getModuleFlag("amdhsa_code_object_version"))) {
205 return (unsigned)Ver->getZExtValue() / 100;
206 }
207
209}
210
214
216 switch (ABIVersion) {
218 return 4;
220 return 5;
222 return 6;
223 default:
225 }
226}
227
230 return 0;
231
232 switch (CodeObjectVersion) {
233 case 4:
235 case 5:
237 case 6:
239 default:
241 Twine(CodeObjectVersion));
242 }
243}
244
246 switch (CodeObjectVersion) {
248 return 48;
251 default:
253 }
254}
255
256
257
259 switch (CodeObjectVersion) {
261 return 24;
264 default:
266 }
267}
268
270 switch (CodeObjectVersion) {
272 return 32;
275 default:
277 }
278}
279
281 switch (CodeObjectVersion) {
283 return 40;
286 default:
288 }
289}
290
291#define GET_MIMGBaseOpcodesTable_IMPL
292#define GET_MIMGDimInfoTable_IMPL
293#define GET_MIMGInfoTable_IMPL
294#define GET_MIMGLZMappingTable_IMPL
295#define GET_MIMGMIPMappingTable_IMPL
296#define GET_MIMGBiasMappingTable_IMPL
297#define GET_MIMGOffsetMappingTable_IMPL
298#define GET_MIMGG16MappingTable_IMPL
299#define GET_MAIInstInfoTable_IMPL
300#define GET_WMMAInstInfoTable_IMPL
301#include "AMDGPUGenSearchableTables.inc"
302
304 unsigned VDataDwords, unsigned VAddrDwords) {
306 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
307 return Info ? Info->Opcode : -1;
308}
309
314
320 return NewInfo ? NewInfo->Opcode : -1;
321}
322
325 bool IsG16Supported) {
326 unsigned AddrWords = BaseOpcode->NumExtraArgs;
329 if (IsA16)
330 AddrWords += divideCeil(AddrComponents, 2);
331 else
332 AddrWords += AddrComponents;
333
334
335
336
337
338
340 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
341
342
343
345 else
347 }
348 return AddrWords;
349}
350
361
370
375
380
384
388
392
399
407
412
413#define GET_FP4FP8DstByteSelTable_DECL
414#define GET_FP4FP8DstByteSelTable_IMPL
415
420
426
427#define GET_MTBUFInfoTable_DECL
428#define GET_MTBUFInfoTable_IMPL
429#define GET_MUBUFInfoTable_DECL
430#define GET_MUBUFInfoTable_IMPL
431#define GET_SMInfoTable_DECL
432#define GET_SMInfoTable_IMPL
433#define GET_VOP1InfoTable_DECL
434#define GET_VOP1InfoTable_IMPL
435#define GET_VOP2InfoTable_DECL
436#define GET_VOP2InfoTable_IMPL
437#define GET_VOP3InfoTable_DECL
438#define GET_VOP3InfoTable_IMPL
439#define GET_VOPC64DPPTable_DECL
440#define GET_VOPC64DPPTable_IMPL
441#define GET_VOPC64DPP8Table_DECL
442#define GET_VOPC64DPP8Table_IMPL
443#define GET_VOPCAsmOnlyInfoTable_DECL
444#define GET_VOPCAsmOnlyInfoTable_IMPL
445#define GET_VOP3CAsmOnlyInfoTable_DECL
446#define GET_VOP3CAsmOnlyInfoTable_IMPL
447#define GET_VOPDComponentTable_DECL
448#define GET_VOPDComponentTable_IMPL
449#define GET_VOPDPairs_DECL
450#define GET_VOPDPairs_IMPL
451#define GET_VOPTrue16Table_DECL
452#define GET_VOPTrue16Table_IMPL
453#define GET_True16D16Table_IMPL
454#define GET_WMMAOpcode2AddrMappingTable_DECL
455#define GET_WMMAOpcode2AddrMappingTable_IMPL
456#define GET_WMMAOpcode3AddrMappingTable_DECL
457#define GET_WMMAOpcode3AddrMappingTable_IMPL
458#define GET_getMFMA_F8F6F4_WithSize_DECL
459#define GET_getMFMA_F8F6F4_WithSize_IMPL
460#define GET_isMFMA_F8F6F4Table_IMPL
461#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
462
463#include "AMDGPUGenSearchableTables.inc"
464
467 return Info ? Info->BaseOpcode : -1;
468}
469
472 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
473 return Info ? Info->Opcode : -1;
474}
475
478 return Info ? Info->elements : 0;
479}
480
483 return Info && Info->has_vaddr;
484}
485
488 return Info && Info->has_srsrc;
489}
490
493 return Info && Info->has_soffset;
494}
495
498 return Info ? Info->BaseOpcode : -1;
499}
500
503 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
504 return Info ? Info->Opcode : -1;
505}
506
509 return Info ? Info->elements : 0;
510}
511
514 return Info && Info->has_vaddr;
515}
516
519 return Info && Info->has_srsrc;
520}
521
524 return Info && Info->has_soffset;
525}
526
529 return Info && Info->IsBufferInv;
530}
531
536
541
544 return || Info->IsSingle;
545}
546
549 return || Info->IsSingle;
550}
551
554 return || Info->IsSingle;
555}
556
558 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
559}
560
562
567
570 return Info && Info->is_gfx940_xdl;
571}
572
575 return Info ? Info->is_wmma_xdl : false;
576}
577
579 switch (EncodingVal) {
582 return 6;
584 return 4;
587 default:
588 return 8;
589 }
590
592}
593
595 unsigned BLGP,
596 unsigned F8F8Opcode) {
599 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
600}
601
603 switch (Fmt) {
606 return 16;
609 return 12;
611 return 8;
612 }
613
615}
616
618 unsigned FmtB,
619 unsigned F8F8Opcode) {
622 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
623}
624
626 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
628 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
630 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
632 llvm_unreachable("Subtarget generation does not support VOPD!");
633}
634
636 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
637 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
640
641
642
643
646 EncodingFamily, VOPD3) != -1;
647 return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY};
648 }
649
650 return {false, false};
651}
652
654 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
655 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
657 return Info ? Info->VOPDOp : ~0u;
658}
659
663
665 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
666 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
667 Opc == AMDGPU::V_MAC_F32_e64_vi ||
668 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
669 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
670 Opc == AMDGPU::V_MAC_F16_e64_vi ||
671 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
672 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
673 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
674 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
675 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
676 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
677 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
678 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
679 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
680 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
681 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
682 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
683 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
684 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
685 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
686 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
687 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
688 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
689}
690
692 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
693 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
694 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
695 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
696 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
697 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
698 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
699 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
700}
701
703 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
704 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
705 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
706 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
707 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
708 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
709 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
710 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
711 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
712 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
713}
714
716 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
717 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
718 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
719 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
720 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
721 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
722 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
723 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
724 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
725 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
726 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
727 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
728 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
729 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
730 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
731 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
732 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
733 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
734 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
735}
736
738 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
739 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
740 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
741 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
742 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
743 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
744 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
745 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
746}
747
749 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
750 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
751}
752
767
772
777 if (Info->HasFP8DstByteSel)
779 if (Info->HasFP4DstByteSel)
781
783}
784
787 return Info ? Info->Opcode3Addr : ~0u;
788}
789
792 return Info ? Info->Opcode2Addr : ~0u;
793}
794
795
796
797
799 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
800}
801
803 switch (Opc) {
804 default:
805 return 0;
806 case AMDGPU::V_AND_B32_e32:
807 return 0x40;
808 case AMDGPU::V_OR_B32_e32:
809 return 0x54;
810 case AMDGPU::V_XOR_B32_e32:
811 return 0x14;
812 case AMDGPU::V_XNOR_B32_e32:
813 return 0x41;
814 }
815}
816
817int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
818 bool VOPD3) {
819 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
820 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
822 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
823 return Info ? Info->Opcode : -1;
824}
825
827 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
829 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
830 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
832 return {OpX->BaseVOP, OpY->BaseVOP};
833}
834
836
839
844 HasSrc2Acc = TiedIdx != -1;
846
851 : 1;
853
854 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
855 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
856
857
858 NumVOPD3Mods = 2;
859 if (IsVOP3)
860 SrcOperandsNum = 3;
862 getNamedOperandIdx(Opcode, OpName::src0))) {
863
864
865 NumVOPD3Mods = SrcOperandsNum;
866 if (HasSrc2Acc)
867 --NumVOPD3Mods;
868 }
869
871 return;
872
874 unsigned CompOprIdx;
875 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
877 MandatoryLiteralIdx = CompOprIdx;
878 break;
879 }
880 }
881}
882
884 return getNamedOperandIdx(Opcode, OpName::bitop3);
885}
886
889
892
896
897
898 return 0;
899}
900
902 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
904 bool VOPD3) const {
905
910
912 unsigned BanksMask) -> bool {
915 if (!BaseX)
916 BaseX = X;
917 if (!BaseY)
918 BaseY = Y;
919 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
920 return true;
921 if (BaseX != X &&
922 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
923 return true;
924 if (BaseY != Y &&
925 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
926 return true;
927
928
929
930 return false;
931 };
932
933 unsigned CompOprIdx;
937 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
938 continue;
939
942 return CompOprIdx;
943
945 continue;
946
948
949
950 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
951 return CompOprIdx;
952 if (VOPD3)
953 continue;
954 }
955
956 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
958 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
959 return CompOprIdx;
960 }
961
962 return {};
963}
964
965
966
967
968
969
970
971
973InstInfo::getRegIndices(unsigned CompIdx,
974 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
975 bool VOPD3) const {
977
978 const auto &Comp = CompInfo[CompIdx];
980
981 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
982
983 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
984 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
986 Comp.hasRegSrcOperand(CompSrcIdx)
987 ? GetRegIdx(CompIdx,
988 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
990 }
992}
993
994}
995
999
1003 const auto &OpXDesc = InstrInfo->get(OpX);
1004 const auto &OpYDesc = InstrInfo->get(OpY);
1009}
1010
1011namespace IsaInfo {
1012
1016 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
1018 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
1020}
1021
1023
1024
1025
1027 std::optional XnackRequested;
1028 std::optional SramEccRequested;
1029
1030 for (const std::string &Feature : Features.getFeatures()) {
1031 if (Feature == "+xnack")
1032 XnackRequested = true;
1033 else if (Feature == "-xnack")
1034 XnackRequested = false;
1035 else if (Feature == "+sramecc")
1036 SramEccRequested = true;
1037 else if (Feature == "-sramecc")
1038 SramEccRequested = false;
1039 }
1040
1043
1044 if (XnackRequested) {
1045 if (XnackSupported) {
1046 XnackSetting =
1048 } else {
1049
1050
1051 if (*XnackRequested) {
1052 errs() << "warning: xnack 'On' was requested for a processor that does "
1053 "not support it!\n";
1054 } else {
1055 errs() << "warning: xnack 'Off' was requested for a processor that "
1056 "does not support it!\n";
1057 }
1058 }
1059 }
1060
1061 if (SramEccRequested) {
1062 if (SramEccSupported) {
1063 SramEccSetting =
1065 } else {
1066
1067
1068
1069 if (*SramEccRequested) {
1070 errs() << "warning: sramecc 'On' was requested for a processor that "
1071 "does not support it!\n";
1072 } else {
1073 errs() << "warning: sramecc 'Off' was requested for a processor that "
1074 "does not support it!\n";
1075 }
1076 }
1077 }
1078}
1079
1089
1092 TargetID.split(TargetIDSplit, ':');
1093
1094 for (const auto &FeatureString : TargetIDSplit) {
1095 if (FeatureString.starts_with("xnack"))
1097 if (FeatureString.starts_with("sramecc"))
1099 }
1100}
1101
1103 std::string StringRep;
1105
1106 auto TargetTriple = STI.getTargetTriple();
1108
1109 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1110 << '-' << TargetTriple.getOSName() << '-'
1111 << TargetTriple.getEnvironmentName() << '-';
1112
1113 std::string Processor;
1114
1115
1116
1118 Processor = STI.getCPU().str();
1119 else
1122 .str();
1123
1124 std::string Features;
1125 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
1126
1128 Features += ":sramecc-";
1130 Features += ":sramecc+";
1131
1133 Features += ":xnack-";
1135 Features += ":xnack+";
1136 }
1137
1138 StreamRep << Processor << Features;
1139
1140 return StringRep;
1141}
1142
1145 return 16;
1147 return 32;
1148
1149 return 64;
1150}
1151
1154
1155
1156
1157
1159 BytesPerCU *= 2;
1160
1161 return BytesPerCU;
1162}
1163
1165 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1166 return 32768;
1167 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1168 return 65536;
1169 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1170 return 163840;
1171 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
1172 return 327680;
1173 return 32768;
1174}
1175
1177
1178
1179
1180
1183 return 4;
1184 }
1185
1186
1187
1189 return 2;
1190
1191
1192
1193 return 4;
1194}
1195
1197 unsigned FlatWorkGroupSize) {
1198 assert(FlatWorkGroupSize != 0);
1200 return 8;
1203 if (N == 1) {
1204
1205 return MaxWaves;
1206 }
1207
1208 unsigned MaxBarriers = 16;
1210 MaxBarriers = 32;
1211
1212 return std::min(MaxWaves / N, MaxBarriers);
1213}
1214
1216
1218
1220 return 8;
1222 return 10;
1224}
1225
1227 unsigned FlatWorkGroupSize) {
1230}
1231
1233
1235
1236 return 1024;
1237}
1238
1240 unsigned FlatWorkGroupSize) {
1242}
1243
1246 if (Version.Major >= 10)
1249 return 16;
1250 return 8;
1251}
1252
1254
1258 return 800;
1259 return 512;
1260}
1261
1265
1267 if (Version.Major >= 10)
1268 return 106;
1270 return 102;
1271 return 104;
1272}
1273
1275 assert(WavesPerEU != 0);
1276
1278 if (Version.Major >= 10)
1279 return 0;
1280
1282 return 0;
1283
1284 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1286 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1289}
1290
1292 bool Addressable) {
1293 assert(WavesPerEU != 0);
1294
1297 if (Version.Major >= 10)
1298 return Addressable ? AddressableNumSGPRs : 108;
1299 if (Version.Major >= 8 && !Addressable)
1300 AddressableNumSGPRs = 112;
1303 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1305 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1306}
1307
1309 bool FlatScrUsed, bool XNACKUsed) {
1310 unsigned ExtraSGPRs = 0;
1311 if (VCCUsed)
1312 ExtraSGPRs = 2;
1313
1315 if (Version.Major >= 10)
1316 return ExtraSGPRs;
1317
1318 if (Version.Major < 8) {
1319 if (FlatScrUsed)
1320 ExtraSGPRs = 4;
1321 } else {
1322 if (XNACKUsed)
1323 ExtraSGPRs = 4;
1324
1325 if (FlatScrUsed ||
1327 ExtraSGPRs = 6;
1328 }
1329
1330 return ExtraSGPRs;
1331}
1332
1334 bool FlatScrUsed) {
1337}
1338
1340 unsigned Granule) {
1341 return divideCeil(std::max(1u, NumRegs), Granule);
1342}
1343
1349
1351 unsigned DynamicVGPRBlockSize,
1352 std::optional EnableWavefrontSize32) {
1354 return 8;
1355
1356 if (DynamicVGPRBlockSize != 0)
1357 return DynamicVGPRBlockSize;
1358
1359 bool IsWave32 = EnableWavefrontSize32
1360 ? *EnableWavefrontSize32
1362
1364 return IsWave32 ? 24 : 12;
1365
1367 return IsWave32 ? 16 : 8;
1368
1369 return IsWave32 ? 8 : 4;
1370}
1371
1373 std::optional EnableWavefrontSize32) {
1375 return 8;
1376
1377 bool IsWave32 = EnableWavefrontSize32
1378 ? *EnableWavefrontSize32
1380
1382 return IsWave32 ? 16 : 8;
1383
1384 return IsWave32 ? 8 : 4;
1385}
1386
1388
1391 return 512;
1393 return 256;
1396 return IsWave32 ? 1536 : 768;
1397 return IsWave32 ? 1024 : 512;
1398}
1399
1402 if (Features.test(Feature1024AddressableVGPRs))
1403 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
1404 return 256;
1405}
1406
1408 unsigned DynamicVGPRBlockSize) {
1410 if (Features.test(FeatureGFX90AInsts))
1411 return 512;
1412
1413 if (DynamicVGPRBlockSize != 0)
1414
1417}
1418
1420 unsigned NumVGPRs,
1421 unsigned DynamicVGPRBlockSize) {
1425}
1426
1428 unsigned MaxWaves,
1429 unsigned TotalNumVGPRs) {
1430 if (NumVGPRs < Granule)
1431 return MaxWaves;
1432 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1433 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1434}
1435
1439 return MaxWaves;
1440
1442 if (SGPRs <= 80)
1443 return 10;
1444 if (SGPRs <= 88)
1445 return 9;
1446 if (SGPRs <= 100)
1447 return 8;
1448 return 7;
1449 }
1450 if (SGPRs <= 48)
1451 return 10;
1452 if (SGPRs <= 56)
1453 return 9;
1454 if (SGPRs <= 64)
1455 return 8;
1456 if (SGPRs <= 72)
1457 return 7;
1458 if (SGPRs <= 80)
1459 return 6;
1460 return 5;
1461}
1462
1464 unsigned DynamicVGPRBlockSize) {
1465 assert(WavesPerEU != 0);
1466
1468 if (WavesPerEU >= MaxWavesPerEU)
1469 return 0;
1470
1472 unsigned AddrsableNumVGPRs =
1475 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1476
1477 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1478 return 0;
1479
1481 DynamicVGPRBlockSize);
1482 if (WavesPerEU < MinWavesPerEU)
1483 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1484
1485 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1486 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1487 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1488}
1489
1491 unsigned DynamicVGPRBlockSize) {
1492 assert(WavesPerEU != 0);
1493
1494 unsigned MaxNumVGPRs =
1497 unsigned AddressableNumVGPRs =
1499 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1500}
1501
1503 std::optional EnableWavefrontSize32) {
1506 1;
1507}
1508
1510 unsigned NumVGPRs,
1511 unsigned DynamicVGPRBlockSize,
1512 std::optional EnableWavefrontSize32) {
1514 NumVGPRs,
1516}
1517}
1518
1532 } else {
1534 }
1535
1536
1537
1539
1540
1541
1545
1546 if (Version.Major >= 10) {
1550 }
1551}
1552
1556
1560
1566
1570
1572 return C == 'v' || C == 's' || C == 'a';
1573}
1574
1576 char Kind = RegName.front();
1578 return {};
1579
1581 if (RegName.consume_front("[")) {
1582 unsigned Idx, End;
1588 unsigned NumRegs = End - Idx + 1;
1589 if (NumRegs > 1)
1590 return {Kind, Idx, NumRegs};
1591 }
1592 } else {
1593 unsigned Idx;
1596 return {Kind, Idx, 1};
1597 }
1598
1599 return {};
1600}
1601
1602std::tuple<char, unsigned, unsigned>
1605 if (.consume_front("{") ||
.consume_back("}"))
1606 return {};
1608}
1609
1610std::pair<unsigned, unsigned>
1612 std::pair<unsigned, unsigned> Default,
1613 bool OnlyFirstRequired) {
1615 return {Attr->first, Attr->second.value_or(Default.second)};
1617}
1618
1619std::optional<std::pair<unsigned, std::optional>>
1621 bool OnlyFirstRequired) {
1623 if (.isStringAttribute())
1624 return std::nullopt;
1625
1627 std::pair<unsigned, std::optional> Ints;
1628 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1629 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1630 Ctx.emitError("can't parse first integer attribute " + Name);
1631 return std::nullopt;
1632 }
1633 unsigned Second = 0;
1634 if (Strs.second.trim().getAsInteger(0, Second)) {
1635 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1636 Ctx.emitError("can't parse second integer attribute " + Name);
1637 return std::nullopt;
1638 }
1639 } else {
1640 Ints.second = Second;
1641 }
1642
1643 return Ints;
1644}
1645
1647 unsigned Size,
1649 std::optional<SmallVector> R =
1652}
1653
1654std::optional<SmallVector>
1658
1660 if (.isValid())
1661 return std::nullopt;
1662 if (.isStringAttribute()) {
1663 Ctx.emitError(Name + " is not a string attribute");
1664 return std::nullopt;
1665 }
1666
1668
1670 unsigned i = 0;
1671 for (; !S.empty() && i < Size; i++) {
1672 std::pair<StringRef, StringRef> Strs = S.split(',');
1673 unsigned IntVal;
1674 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1675 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1676 Name);
1677 return std::nullopt;
1678 }
1679 Vals[i] = IntVal;
1680 S = Strs.second;
1681 }
1682
1684 Ctx.emitError("attribute " + Name +
1685 " has incorrect number of integers; expected " +
1687 return std::nullopt;
1688 }
1689 return Vals;
1690}
1691
1695 auto Low =
1699
1700
1701
1702
1704 if (Low.ule(Val) && High.ugt(Val))
1705 return true;
1706 } else {
1707 if (Low.uge(Val) && High.ult(Val))
1708 return true;
1709 }
1710 }
1711
1712 return false;
1713}
1714
1717 if (Wait.LoadCnt != ~0u)
1718 OS << LS << "LoadCnt: " << Wait.LoadCnt;
1719 if (Wait.ExpCnt != ~0u)
1720 OS << LS << "ExpCnt: " << Wait.ExpCnt;
1721 if (Wait.DsCnt != ~0u)
1722 OS << LS << "DsCnt: " << Wait.DsCnt;
1723 if (Wait.StoreCnt != ~0u)
1724 OS << LS << "StoreCnt: " << Wait.StoreCnt;
1725 if (Wait.SampleCnt != ~0u)
1726 OS << LS << "SampleCnt: " << Wait.SampleCnt;
1727 if (Wait.BvhCnt != ~0u)
1728 OS << LS << "BvhCnt: " << Wait.BvhCnt;
1729 if (Wait.KmCnt != ~0u)
1730 OS << LS << "KmCnt: " << Wait.KmCnt;
1731 if (Wait.XCnt != ~0u)
1732 OS << LS << "XCnt: " << Wait.XCnt;
1733 if (LS.unused())
1734 OS << "none";
1735 OS << '\n';
1736 return OS;
1737}
1738
1740 return (1 << (getVmcntBitWidthLo(Version.Major) +
1741 getVmcntBitWidthHi(Version.Major))) -
1742 1;
1743}
1744
1746 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1747}
1748
1750 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1751}
1752
1754 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1755}
1756
1758 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1759}
1760
1762 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1763}
1764
1766 return (1 << getDscntBitWidth(Version.Major)) - 1;
1767}
1768
1770 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1771}
1772
1774 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1775}
1776
1778 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1779}
1780
1782 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1783 getVmcntBitWidthLo(Version.Major));
1784 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1785 getExpcntBitWidth(Version.Major));
1786 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1787 getLgkmcntBitWidth(Version.Major));
1788 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1789 getVmcntBitWidthHi(Version.Major));
1790 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1791}
1792
1794 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1795 getVmcntBitWidthLo(Version.Major));
1796 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1797 getVmcntBitWidthHi(Version.Major));
1798 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1799}
1800
1802 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1803 getExpcntBitWidth(Version.Major));
1804}
1805
1807 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1808 getLgkmcntBitWidth(Version.Major));
1809}
1810
1812 unsigned &Expcnt, unsigned &Lgkmcnt) {
1816}
1817
1823 return Decoded;
1824}
1825
1827 unsigned Vmcnt) {
1829 getVmcntBitWidthLo(Version.Major));
1830 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1831 getVmcntBitShiftHi(Version.Major),
1832 getVmcntBitWidthHi(Version.Major));
1833}
1834
1836 unsigned Expcnt) {
1837 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1838 getExpcntBitWidth(Version.Major));
1839}
1840
1842 unsigned Lgkmcnt) {
1843 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1844 getLgkmcntBitWidth(Version.Major));
1845}
1846
1848 unsigned Expcnt, unsigned Lgkmcnt) {
1854}
1855
1859
1861 bool IsStore) {
1862 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1863 getDscntBitWidth(Version.Major));
1864 if (IsStore) {
1865 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1866 getStorecntBitWidth(Version.Major));
1867 return Dscnt | Storecnt;
1868 }
1869 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1870 getLoadcntBitWidth(Version.Major));
1871 return Dscnt | Loadcnt;
1872}
1873
1877 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1878 getLoadcntBitWidth(Version.Major));
1879 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1880 getDscntBitWidth(Version.Major));
1881 return Decoded;
1882}
1883
1887 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1888 getStorecntBitWidth(Version.Major));
1889 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1890 getDscntBitWidth(Version.Major));
1891 return Decoded;
1892}
1893
1895 unsigned Loadcnt) {
1896 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1897 getLoadcntBitWidth(Version.Major));
1898}
1899
1901 unsigned Storecnt) {
1902 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1903 getStorecntBitWidth(Version.Major));
1904}
1905
1907 unsigned Dscnt) {
1908 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1909 getDscntBitWidth(Version.Major));
1910}
1911
1913 unsigned Dscnt) {
1918}
1919
1923
1925 unsigned Storecnt, unsigned Dscnt) {
1930}
1931
1933 const Waitcnt &Decoded) {
1935}
1936
1937
1938
1939
1940
1944 unsigned Enc = 0;
1945 for (int Idx = 0; Idx < Size; ++Idx) {
1946 const auto &Op = Opr[Idx];
1947 if (Op.isSupported(STI))
1948 Enc |= Op.encode(Op.Default);
1949 }
1950 return Enc;
1951}
1952
1954 int Size, unsigned Code,
1955 bool &HasNonDefaultVal,
1957 unsigned UsedOprMask = 0;
1958 HasNonDefaultVal = false;
1959 for (int Idx = 0; Idx < Size; ++Idx) {
1960 const auto &Op = Opr[Idx];
1961 if (.isSupported(STI))
1962 continue;
1963 UsedOprMask |= Op.getMask();
1964 unsigned Val = Op.decode(Code);
1965 if (.isValid(Val))
1966 return false;
1967 HasNonDefaultVal |= (Val != Op.Default);
1968 }
1969 return (Code & ~UsedOprMask) == 0;
1970}
1971
1973 unsigned Code, int &Idx, StringRef &Name,
1974 unsigned &Val, bool &IsDefault,
1976 while (Idx < Size) {
1977 const auto &Op = Opr[Idx++];
1978 if (Op.isSupported(STI)) {
1979 Name = Op.Name;
1980 Val = Op.decode(Code);
1981 IsDefault = (Val == Op.Default);
1982 return true;
1983 }
1984 }
1985
1986 return false;
1987}
1988
1990 int64_t InputVal) {
1991 if (InputVal < 0 || InputVal > Op.Max)
1993 return Op.encode(InputVal);
1994}
1995
1997 const StringRef Name, int64_t InputVal,
1998 unsigned &UsedOprMask,
2001 for (int Idx = 0; Idx < Size; ++Idx) {
2002 const auto &Op = Opr[Idx];
2003 if (Op.Name == Name) {
2004 if (.isSupported(STI)) {
2006 continue;
2007 }
2008 auto OprMask = Op.getMask();
2009 if (OprMask & UsedOprMask)
2011 UsedOprMask |= OprMask;
2013 }
2014 }
2015 return InvalidId;
2016}
2017
2018
2019
2020
2021
2022namespace DepCtr {
2023
2025 static int Default = -1;
2029}
2030
2034 HasNonDefaultVal, STI);
2035}
2036
2040 IsDefault, STI);
2041}
2042
2048
2050 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2051}
2052
2054 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2055}
2056
2058 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2059}
2060
2062 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2063}
2064
2066 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
2067}
2068
2070 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2071}
2072
2074 return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
2075}
2076
2078 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2079}
2080
2085
2087 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2088}
2089
2094
2096 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2097}
2098
2103
2105 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2106}
2107
2112
2114 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
2115}
2116
2121
2123 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2124}
2125
2130
2132 return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
2133}
2134
2139
2140}
2141
2142
2143
2144
2145
2146namespace Exp {
2147
2153
2154
2164
2165
2168 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2169 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2170 Name = Val.Name;
2171 return true;
2172 }
2173 }
2174 return false;
2175}
2176
2178
2180 if (Val.MaxIndex == 0 && Name == Val.Name)
2181 return Val.Tgt;
2182
2183 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2184 StringRef Suffix = Name.drop_front(Val.Name.size());
2185
2186 unsigned Id;
2187 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2189
2190
2191 if (Suffix.size() > 1 && Suffix[0] == '0')
2193
2194 return Val.Tgt + Id;
2195 }
2196 }
2198}
2199
2201 switch (Id) {
2210 default:
2213 return true;
2214 }
2215}
2216
2217}
2218
2219
2220
2221
2222
2223namespace MTBUFFormat {
2224
2228 return Id;
2229 }
2231}
2232
2237
2245
2249 if (Name == lookupTable[Id])
2250 return Id;
2251 }
2253}
2254
2259
2261 unsigned Dfmt;
2262 unsigned Nfmt;
2265}
2266
2270
2274
2279
2284 return Id;
2285 }
2286 } else {
2289 return Id;
2290 }
2291 }
2293}
2294
2300
2304
2311 return Id;
2312 }
2313 } else {
2316 return Id;
2317 }
2318 }
2320}
2321
2325
2331
2332}
2333
2334
2335
2336
2337
2338namespace SendMsg {
2339
2343
2347
2349 bool Strict) {
2351
2352 if (!Strict)
2354
2357 return false;
2358
2360 }
2361
2363}
2364
2368
2369 if (!Strict)
2371
2373 switch (MsgId) {
2380 }
2381 }
2383}
2384
2390
2397
2402 OpId = 0;
2404 } else {
2407 }
2408}
2409
2413
2414}
2415
2416
2417
2418
2419
2421 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2422}
2423
2425
2426 return F.getFnAttributeAsParsedInteger(
2427 "amdgpu-color-export",
2429}
2430
2432 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2433}
2434
2437 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2438
2441
2442 return 0;
2443}
2444
2446 return STI.hasFeature(AMDGPU::FeatureXNACK);
2447}
2448
2450 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2451}
2452
2454 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2455 !STI.hasFeature(AMDGPU::FeatureR128A16);
2456}
2457
2459 return STI.hasFeature(AMDGPU::FeatureA16);
2460}
2461
2463 return STI.hasFeature(AMDGPU::FeatureG16);
2464}
2465
2467 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && (STI) &&
2469}
2470
2472 return STI.hasFeature(AMDGPU::FeatureGDS);
2473}
2474
2477 if (Version.Major == 10)
2478 return Version.Minor >= 3 ? 13 : 5;
2479 if (Version.Major == 11)
2480 return 5;
2481 if (Version.Major >= 12)
2482 return HasSampler ? 4 : 5;
2483 return 0;
2484}
2485
2488 return 32;
2489 return 16;
2490}
2491
2493 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2494}
2495
2497 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2498}
2499
2501 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2502}
2503
2505 return STI.hasFeature(AMDGPU::FeatureGFX9);
2506}
2507
2511
2515
2519
2523
2527
2529
2531 return STI.hasFeature(AMDGPU::FeatureGFX10);
2532}
2533
2537
2541
2543 return STI.hasFeature(AMDGPU::FeatureGFX11);
2544}
2545
2549
2553
2555
2557
2561
2567
2569
2573
2577
2579 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2580}
2581
2583 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2584}
2585
2587 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2588}
2589
2591 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2592}
2593
2597
2599 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2600}
2601
2603 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2604}
2605
2607 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2608}
2609
2611 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2612}
2613
2615 return STI.hasFeature(AMDGPU::FeatureVOPD);
2616}
2617
2619 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2620}
2621
2623 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2624}
2625
2627 int32_t ArgNumVGPR) {
2628 if (has90AInsts && ArgNumAGPR)
2629 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2630 return std::max(ArgNumVGPR, ArgNumAGPR);
2631}
2632
2634 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2635 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2636 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2637 Reg == AMDGPU::SCC;
2638}
2639
2643
2644#define MAP_REG2REG \
2645 using namespace AMDGPU; \
2646 switch (Reg.id()) { \
2647 default: \
2648 return Reg; \
2649 CASE_CI_VI(FLAT_SCR) \
2650 CASE_CI_VI(FLAT_SCR_LO) \
2651 CASE_CI_VI(FLAT_SCR_HI) \
2652 CASE_VI_GFX9PLUS(TTMP0) \
2653 CASE_VI_GFX9PLUS(TTMP1) \
2654 CASE_VI_GFX9PLUS(TTMP2) \
2655 CASE_VI_GFX9PLUS(TTMP3) \
2656 CASE_VI_GFX9PLUS(TTMP4) \
2657 CASE_VI_GFX9PLUS(TTMP5) \
2658 CASE_VI_GFX9PLUS(TTMP6) \
2659 CASE_VI_GFX9PLUS(TTMP7) \
2660 CASE_VI_GFX9PLUS(TTMP8) \
2661 CASE_VI_GFX9PLUS(TTMP9) \
2662 CASE_VI_GFX9PLUS(TTMP10) \
2663 CASE_VI_GFX9PLUS(TTMP11) \
2664 CASE_VI_GFX9PLUS(TTMP12) \
2665 CASE_VI_GFX9PLUS(TTMP13) \
2666 CASE_VI_GFX9PLUS(TTMP14) \
2667 CASE_VI_GFX9PLUS(TTMP15) \
2668 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2669 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2670 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2671 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2672 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2673 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2674 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2675 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2676 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2677 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2678 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2679 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2680 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2681 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2682 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2683 CASE_VI_GFX9PLUS( \
2684 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2685 CASE_GFXPRE11_GFX11PLUS(M0) \
2686 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2687 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2688 }
2689
2690#define CASE_CI_VI(node) \
2691 assert(!isSI(STI)); \
2692 case node: \
2693 return isCI(STI) ? node##_ci : node##_vi;
2694
2695#define CASE_VI_GFX9PLUS(node) \
2696 case node: \
2697 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2698
2699#define CASE_GFXPRE11_GFX11PLUS(node) \
2700 case node: \
2701 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2702
2703#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2704 case node: \
2705 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2706
2712
2713#undef CASE_CI_VI
2714#undef CASE_VI_GFX9PLUS
2715#undef CASE_GFXPRE11_GFX11PLUS
2716#undef CASE_GFXPRE11_GFX11PLUS_TO
2717
2718#define CASE_CI_VI(node) \
2719 case node##_ci: \
2720 case node##_vi: \
2721 return node;
2722#define CASE_VI_GFX9PLUS(node) \
2723 case node##_vi: \
2724 case node##_gfx9plus: \
2725 return node;
2726#define CASE_GFXPRE11_GFX11PLUS(node) \
2727 case node##_gfx11plus: \
2728 case node##_gfxpre11: \
2729 return node;
2730#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2731
2733
2735 switch (Reg.id()) {
2736 case AMDGPU::SRC_SHARED_BASE_LO:
2737 case AMDGPU::SRC_SHARED_BASE:
2738 case AMDGPU::SRC_SHARED_LIMIT_LO:
2739 case AMDGPU::SRC_SHARED_LIMIT:
2740 case AMDGPU::SRC_PRIVATE_BASE_LO:
2741 case AMDGPU::SRC_PRIVATE_BASE:
2742 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2743 case AMDGPU::SRC_PRIVATE_LIMIT:
2744 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2745 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2746 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2747 return true;
2748 case AMDGPU::SRC_VCCZ:
2749 case AMDGPU::SRC_EXECZ:
2750 case AMDGPU::SRC_SCC:
2751 return true;
2752 case AMDGPU::SGPR_NULL:
2753 return true;
2754 default:
2755 return false;
2756 }
2757}
2758
2759#undef CASE_CI_VI
2760#undef CASE_VI_GFX9PLUS
2761#undef CASE_GFXPRE11_GFX11PLUS
2762#undef CASE_GFXPRE11_GFX11PLUS_TO
2763#undef MAP_REG2REG
2764
2767 unsigned OpType = Desc.operands()[OpNo].OperandType;
2770}
2771
2774 unsigned OpType = Desc.operands()[OpNo].OperandType;
2775 switch (OpType) {
2788 return true;
2789 default:
2790 return false;
2791 }
2792}
2793
2796 unsigned OpType = Desc.operands()[OpNo].OperandType;
2801}
2802
2803
2804
2806 switch (RCID) {
2807 case AMDGPU::VGPR_16RegClassID:
2808 case AMDGPU::VGPR_16_Lo128RegClassID:
2809 case AMDGPU::SGPR_LO16RegClassID:
2810 case AMDGPU::AGPR_LO16RegClassID:
2811 return 16;
2812 case AMDGPU::SGPR_32RegClassID:
2813 case AMDGPU::VGPR_32RegClassID:
2814 case AMDGPU::VGPR_32_Lo256RegClassID:
2815 case AMDGPU::VRegOrLds_32RegClassID:
2816 case AMDGPU::AGPR_32RegClassID:
2817 case AMDGPU::VS_32RegClassID:
2818 case AMDGPU::AV_32RegClassID:
2819 case AMDGPU::SReg_32RegClassID:
2820 case AMDGPU::SReg_32_XM0RegClassID:
2821 case AMDGPU::SRegOrLds_32RegClassID:
2822 return 32;
2823 case AMDGPU::SGPR_64RegClassID:
2824 case AMDGPU::VS_64RegClassID:
2825 case AMDGPU::SReg_64RegClassID:
2826 case AMDGPU::VReg_64RegClassID:
2827 case AMDGPU::AReg_64RegClassID:
2828 case AMDGPU::SReg_64_XEXECRegClassID:
2829 case AMDGPU::VReg_64_Align2RegClassID:
2830 case AMDGPU::AReg_64_Align2RegClassID:
2831 case AMDGPU::AV_64RegClassID:
2832 case AMDGPU::AV_64_Align2RegClassID:
2833 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2834 case AMDGPU::VS_64_Lo256RegClassID:
2835 return 64;
2836 case AMDGPU::SGPR_96RegClassID:
2837 case AMDGPU::SReg_96RegClassID:
2838 case AMDGPU::VReg_96RegClassID:
2839 case AMDGPU::AReg_96RegClassID:
2840 case AMDGPU::VReg_96_Align2RegClassID:
2841 case AMDGPU::AReg_96_Align2RegClassID:
2842 case AMDGPU::AV_96RegClassID:
2843 case AMDGPU::AV_96_Align2RegClassID:
2844 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2845 return 96;
2846 case AMDGPU::SGPR_128RegClassID:
2847 case AMDGPU::SReg_128RegClassID:
2848 case AMDGPU::VReg_128RegClassID:
2849 case AMDGPU::AReg_128RegClassID:
2850 case AMDGPU::VReg_128_Align2RegClassID:
2851 case AMDGPU::AReg_128_Align2RegClassID:
2852 case AMDGPU::AV_128RegClassID:
2853 case AMDGPU::AV_128_Align2RegClassID:
2854 case AMDGPU::SReg_128_XNULLRegClassID:
2855 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2856 return 128;
2857 case AMDGPU::SGPR_160RegClassID:
2858 case AMDGPU::SReg_160RegClassID:
2859 case AMDGPU::VReg_160RegClassID:
2860 case AMDGPU::AReg_160RegClassID:
2861 case AMDGPU::VReg_160_Align2RegClassID:
2862 case AMDGPU::AReg_160_Align2RegClassID:
2863 case AMDGPU::AV_160RegClassID:
2864 case AMDGPU::AV_160_Align2RegClassID:
2865 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2866 return 160;
2867 case AMDGPU::SGPR_192RegClassID:
2868 case AMDGPU::SReg_192RegClassID:
2869 case AMDGPU::VReg_192RegClassID:
2870 case AMDGPU::AReg_192RegClassID:
2871 case AMDGPU::VReg_192_Align2RegClassID:
2872 case AMDGPU::AReg_192_Align2RegClassID:
2873 case AMDGPU::AV_192RegClassID:
2874 case AMDGPU::AV_192_Align2RegClassID:
2875 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2876 return 192;
2877 case AMDGPU::SGPR_224RegClassID:
2878 case AMDGPU::SReg_224RegClassID:
2879 case AMDGPU::VReg_224RegClassID:
2880 case AMDGPU::AReg_224RegClassID:
2881 case AMDGPU::VReg_224_Align2RegClassID:
2882 case AMDGPU::AReg_224_Align2RegClassID:
2883 case AMDGPU::AV_224RegClassID:
2884 case AMDGPU::AV_224_Align2RegClassID:
2885 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2886 return 224;
2887 case AMDGPU::SGPR_256RegClassID:
2888 case AMDGPU::SReg_256RegClassID:
2889 case AMDGPU::VReg_256RegClassID:
2890 case AMDGPU::AReg_256RegClassID:
2891 case AMDGPU::VReg_256_Align2RegClassID:
2892 case AMDGPU::AReg_256_Align2RegClassID:
2893 case AMDGPU::AV_256RegClassID:
2894 case AMDGPU::AV_256_Align2RegClassID:
2895 case AMDGPU::SReg_256_XNULLRegClassID:
2896 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2897 return 256;
2898 case AMDGPU::SGPR_288RegClassID:
2899 case AMDGPU::SReg_288RegClassID:
2900 case AMDGPU::VReg_288RegClassID:
2901 case AMDGPU::AReg_288RegClassID:
2902 case AMDGPU::VReg_288_Align2RegClassID:
2903 case AMDGPU::AReg_288_Align2RegClassID:
2904 case AMDGPU::AV_288RegClassID:
2905 case AMDGPU::AV_288_Align2RegClassID:
2906 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2907 return 288;
2908 case AMDGPU::SGPR_320RegClassID:
2909 case AMDGPU::SReg_320RegClassID:
2910 case AMDGPU::VReg_320RegClassID:
2911 case AMDGPU::AReg_320RegClassID:
2912 case AMDGPU::VReg_320_Align2RegClassID:
2913 case AMDGPU::AReg_320_Align2RegClassID:
2914 case AMDGPU::AV_320RegClassID:
2915 case AMDGPU::AV_320_Align2RegClassID:
2916 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
2917 return 320;
2918 case AMDGPU::SGPR_352RegClassID:
2919 case AMDGPU::SReg_352RegClassID:
2920 case AMDGPU::VReg_352RegClassID:
2921 case AMDGPU::AReg_352RegClassID:
2922 case AMDGPU::VReg_352_Align2RegClassID:
2923 case AMDGPU::AReg_352_Align2RegClassID:
2924 case AMDGPU::AV_352RegClassID:
2925 case AMDGPU::AV_352_Align2RegClassID:
2926 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
2927 return 352;
2928 case AMDGPU::SGPR_384RegClassID:
2929 case AMDGPU::SReg_384RegClassID:
2930 case AMDGPU::VReg_384RegClassID:
2931 case AMDGPU::AReg_384RegClassID:
2932 case AMDGPU::VReg_384_Align2RegClassID:
2933 case AMDGPU::AReg_384_Align2RegClassID:
2934 case AMDGPU::AV_384RegClassID:
2935 case AMDGPU::AV_384_Align2RegClassID:
2936 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
2937 return 384;
2938 case AMDGPU::SGPR_512RegClassID:
2939 case AMDGPU::SReg_512RegClassID:
2940 case AMDGPU::VReg_512RegClassID:
2941 case AMDGPU::AReg_512RegClassID:
2942 case AMDGPU::VReg_512_Align2RegClassID:
2943 case AMDGPU::AReg_512_Align2RegClassID:
2944 case AMDGPU::AV_512RegClassID:
2945 case AMDGPU::AV_512_Align2RegClassID:
2946 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
2947 return 512;
2948 case AMDGPU::SGPR_1024RegClassID:
2949 case AMDGPU::SReg_1024RegClassID:
2950 case AMDGPU::VReg_1024RegClassID:
2951 case AMDGPU::AReg_1024RegClassID:
2952 case AMDGPU::VReg_1024_Align2RegClassID:
2953 case AMDGPU::AReg_1024_Align2RegClassID:
2954 case AMDGPU::AV_1024RegClassID:
2955 case AMDGPU::AV_1024_Align2RegClassID:
2956 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
2957 return 1024;
2958 default:
2960 }
2961}
2962
2966
2969 return true;
2970
2981 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2982}
2983
2986 return true;
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
3007 (Val == 0x3e22f983 && HasInv2Pi);
3008}
3009
3011 if (!HasInv2Pi)
3012 return false;
3014 return true;
3016 return Val == 0x3F00 ||
3017 Val == 0xBF00 ||
3018 Val == 0x3F80 ||
3019 Val == 0xBF80 ||
3020 Val == 0x4000 ||
3021 Val == 0xC000 ||
3022 Val == 0x4080 ||
3023 Val == 0xC080 ||
3024 Val == 0x3E22;
3025}
3026
3030
3032 if (!HasInv2Pi)
3033 return false;
3035 return true;
3037 return Val == 0x3C00 ||
3038 Val == 0xBC00 ||
3039 Val == 0x3800 ||
3040 Val == 0xB800 ||
3041 Val == 0x4000 ||
3042 Val == 0xC000 ||
3043 Val == 0x4400 ||
3044 Val == 0xC400 ||
3045 Val == 0x3118;
3046}
3047
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3061 return 128 + Signed;
3062
3064 return 192 + std::abs(Signed);
3065
3066 if (IsFloat) {
3067
3069 case 0x3800: return 240;
3070 case 0xB800: return 241;
3071 case 0x3C00: return 242;
3072 case 0xBC00: return 243;
3073 case 0x4000: return 244;
3074 case 0xC000: return 245;
3075 case 0x4400: return 246;
3076 case 0xC400: return 247;
3077 case 0x3118: return 248;
3078 default: break;
3079 }
3080
3081 } else {
3082
3084 case 0x3F000000: return 240;
3085 case 0xBF000000: return 241;
3086 case 0x3F800000: return 242;
3087 case 0xBF800000: return 243;
3088 case 0x40000000: return 244;
3089 case 0xC0000000: return 245;
3090 case 0x40800000: return 246;
3091 case 0xC0800000: return 247;
3092 case 0x3E22F983: return 248;
3093 default: break;
3094 }
3095
3096 }
3097
3098 return {};
3099}
3100
3101
3102
3106
3107
3108
3112 return 128 + Signed;
3113
3115 return 192 + std::abs(Signed);
3116
3117
3119 case 0x3F00: return 240;
3120 case 0xBF00: return 241;
3121 case 0x3F80: return 242;
3122 case 0xBF80: return 243;
3123 case 0x4000: return 244;
3124 case 0xC000: return 245;
3125 case 0x4080: return 246;
3126 case 0xC080: return 247;
3127 case 0x3E22: return 248;
3128 default: break;
3129 }
3130
3131
3132 return std::nullopt;
3133}
3134
3135
3136
3140
3141
3143 switch (OpType) {
3154 return false;
3155 default:
3157 }
3158}
3159
3160
3164
3165
3169
3170
3174
3176 if (IsFP64)
3177 return (Val);
3178
3180}
3181
3183 switch (Type) {
3184 default:
3185 break;
3190 return Imm & 0xffff;
3206 }
3207 return Imm;
3208}
3209
3212
3213
3215 switch (CC) {
3218 return true;
3229
3230
3231 return A->hasAttribute(Attribute::InReg) ||
3232 A->hasAttribute(Attribute::ByVal);
3233 default:
3234
3235 return A->hasAttribute(Attribute::InReg);
3236 }
3237}
3238
3240
3242 switch (CC) {
3245 return true;
3256
3257
3258 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3260 default:
3261 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3262 }
3263}
3264
3268
3270 int64_t EncodedOffset) {
3273
3276}
3277
3279 int64_t EncodedOffset, bool IsBuffer) {
3281 if (IsBuffer && EncodedOffset < 0)
3282 return false;
3283 return isInt<24>(EncodedOffset);
3284 }
3285
3287}
3288
3290 return (ByteOffset & 3) == 0;
3291}
3292
3296 return ByteOffset;
3297
3299 return ByteOffset >> 2;
3300}
3301
3303 int64_t ByteOffset, bool IsBuffer,
3304 bool HasSOffset) {
3305
3306
3307
3309 return std::nullopt;
3310
3311 if (isGFX12Plus(ST))
3312 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3313 : std::nullopt;
3314
3315
3318 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3319 : std::nullopt;
3320 }
3321
3323 return std::nullopt;
3324
3327 ? std::optional<int64_t>(EncodedOffset)
3328 : std::nullopt;
3329}
3330
3332 int64_t ByteOffset) {
3334 return std::nullopt;
3335
3337 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3338 : std::nullopt;
3339}
3340
3343 return 12;
3344
3346 return 24;
3347 return 13;
3348}
3349
3350namespace {
3351
3352struct SourceOfDivergence {
3353 unsigned Intr;
3354};
3355const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3356
3358 unsigned Intr;
3359};
3360const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3361
3362#define GET_SourcesOfDivergence_IMPL
3363#define GET_UniformIntrinsics_IMPL
3364#define GET_Gfx9BufferFormat_IMPL
3365#define GET_Gfx10BufferFormat_IMPL
3366#define GET_Gfx11PlusBufferFormat_IMPL
3367
3368#include "AMDGPUGenSearchableTables.inc"
3369
3370}
3371
3373 return lookupSourceOfDivergence(IntrID);
3374}
3375
3377 return lookupAlwaysUniform(IntrID);
3378}
3379
3384 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3385 BitsPerComp, NumComponents, NumFormat)
3387 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3388 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3389}
3390
3394 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3395 : getGfx9BufferFormatInfo(Format);
3396}
3397
3400 const unsigned VGPRClasses[] = {
3401 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3402 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3403 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3404 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3405 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3406 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3407 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3408 AMDGPU::VReg_1024RegClassID};
3409
3410 for (unsigned RCID : VGPRClasses) {
3413 return &RC;
3414 }
3415
3416 return nullptr;
3417}
3418
3420 unsigned Enc = MRI.getEncodingValue(Reg);
3422 return Idx >> 8;
3423}
3424
3427 unsigned Enc = MRI.getEncodingValue(Reg);
3429 if (Idx >= 0x100)
3431
3433 if (!RC)
3435
3436 Idx |= MSBs << 8;
3437 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3438
3439 Idx *= 2;
3441 ++Idx;
3442 }
3443
3445}
3446
3447std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3449 static const AMDGPU::OpName VOPOps[4] = {
3450 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3451 AMDGPU::OpName::vdst};
3452 static const AMDGPU::OpName VDSOps[4] = {
3453 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3454 AMDGPU::OpName::vdst};
3455 static const AMDGPU::OpName FLATOps[4] = {
3456 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3457 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3458 static const AMDGPU::OpName BUFOps[4] = {
3459 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3460 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3461 static const AMDGPU::OpName VIMGOps[4] = {
3462 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3463 AMDGPU::OpName::vdata};
3464
3465
3466
3467
3468 static const AMDGPU::OpName VOPDOpsX[4] = {
3469 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3470 AMDGPU::OpName::vdstX};
3471 static const AMDGPU::OpName VOPDOpsY[4] = {
3472 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3473 AMDGPU::OpName::vdstY};
3474
3475
3476 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3477 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3478 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3479 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3480 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3481 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3482 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3483 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3484 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3485
3486 unsigned TSFlags = Desc.TSFlags;
3487
3488 if (TSFlags &
3491 switch (Desc.getOpcode()) {
3492
3493 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3494 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3495 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3496 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3497 return {};
3498 case AMDGPU::V_FMAMK_F16:
3499 case AMDGPU::V_FMAMK_F16_t16:
3500 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3501 case AMDGPU::V_FMAMK_F16_fake16:
3502 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3503 case AMDGPU::V_FMAMK_F32:
3504 case AMDGPU::V_FMAMK_F32_gfx12:
3505 case AMDGPU::V_FMAMK_F64:
3506 case AMDGPU::V_FMAMK_F64_gfx1250:
3507 return {VOP2MADMKOps, nullptr};
3508 default:
3509 break;
3510 }
3511 return {VOPOps, nullptr};
3512 }
3513
3515 return {VDSOps, nullptr};
3516
3518 return {FLATOps, nullptr};
3519
3521 return {BUFOps, nullptr};
3522
3524 return {VIMGOps, nullptr};
3525
3528 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3529 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3530 }
3531
3533
3535 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3536 " these instructions are not expected on gfx1250");
3537
3538 return {};
3539}
3540
3543
3547 return false;
3548
3549
3552
3553
3556
3557 return false;
3558}
3559
3562 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3563 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3564 if (Idx == -1)
3565 continue;
3566
3570 if (RegClass == AMDGPU::VReg_64RegClassID ||
3571 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3572 return true;
3573 }
3574
3575 return false;
3576}
3577
3579 switch (Opc) {
3580 case AMDGPU::V_MUL_LO_U32_e64:
3581 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3582 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3583 case AMDGPU::V_MUL_HI_U32_e64:
3584 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3585 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3586 case AMDGPU::V_MUL_HI_I32_e64:
3587 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3588 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3589 case AMDGPU::V_MAD_U32_e64:
3590 case AMDGPU::V_MAD_U32_e64_dpp:
3591 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3592 return true;
3593 default:
3594 return false;
3595 }
3596}
3597
3600 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3601 return false;
3602
3604 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3605
3607}
3608
3610 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3611 return 64;
3612 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3613 return 128;
3614 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3615 return 320;
3616 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3617 return 512;
3618 return 64;
3619}
3620
3622 switch (Opc) {
3623 case AMDGPU::V_PK_ADD_F32:
3624 case AMDGPU::V_PK_ADD_F32_gfx12:
3625 case AMDGPU::V_PK_MUL_F32:
3626 case AMDGPU::V_PK_MUL_F32_gfx12:
3627 case AMDGPU::V_PK_FMA_F32:
3628 case AMDGPU::V_PK_FMA_F32_gfx12:
3629 return true;
3630 default:
3631 return false;
3632 }
3633}
3634
3639
3643
3646 return "";
3648 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3649 return Buffer.c_str();
3650 }
3652 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3653 << EncoVariableDims;
3654 return Buffer.c_str();
3655 }
3657 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3658 return Buffer.c_str();
3659 }
3660 }
3662}
3663
3665 std::optional<SmallVector> Attr =
3668
3669 if (!Attr.has_value())
3671 else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; }))
3673 else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; }))
3675
3678 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3679
3680 return A;
3681}
3682
3683}
3684
3687 switch (S) {
3689 OS << "Unsupported";
3690 break;
3692 OS << "Any";
3693 break;
3695 OS << "Off";
3696 break;
3698 OS << "On";
3699 break;
3700 }
3701 return OS;
3702}
3703
3704}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static llvm:🆑:opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm:🆑:Hidden, llvm:🆑:init(llvm::AMDGPU::AMDHSA_COV6), llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
#define MAP_REG2REG
Definition AMDGPUBaseInfo.cpp:2644
Provides AMDGPU specific target descriptions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Register const TargetRegisterInfo * TRI
#define S_00B848_MEM_ORDERED(x)
#define S_00B848_WGP_MODE(x)
#define S_00B848_FWD_PROGRESS(x)
unsigned unsigned DefaultVal
static const int BlockSize
static ClusterDimsAttr get(const Function &F)
Definition AMDGPUBaseInfo.cpp:3664
ClusterDimsAttr()=default
std::string to_string() const
Definition AMDGPUBaseInfo.cpp:3640
const std::array< unsigned, 3 > & getDims() const
Definition AMDGPUBaseInfo.cpp:3635
bool isSramEccSupported() const
void setTargetIDFromFeaturesString(StringRef FS)
Definition AMDGPUBaseInfo.cpp:1022
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1013
bool isXnackSupported() const
void setTargetIDFromTargetIDStream(StringRef TargetID)
Definition AMDGPUBaseInfo.cpp:1090
std::string toString() const
Definition AMDGPUBaseInfo.cpp:1102
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
Definition AMDGPUBaseInfo.cpp:887
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
int getBitOp3OperandIdx() const
Definition AMDGPUBaseInfo.cpp:883
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
Definition AMDGPUBaseInfo.cpp:901
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
This class represents an incoming formal argument to a Function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
A helper class to return the specified delimiter string after the first invocation of operator String...
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
This holds information about one operand of a machine instruction, indicating the register class for ...
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr unsigned id() const
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
A Module instance is used to store all the information related to an LLVM module.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned decodeFieldVaVcc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2065
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
Definition AMDGPUBaseInfo.cpp:2113
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt)
Definition AMDGPUBaseInfo.cpp:2131
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2037
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
Definition AMDGPUBaseInfo.cpp:2122
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
Definition AMDGPUBaseInfo.cpp:2086
unsigned decodeFieldSaSdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2057
unsigned decodeFieldVaSdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2061
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
Definition AMDGPUBaseInfo.cpp:2077
unsigned decodeFieldVaSsrc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2069
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2043
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
Definition AMDGPUBaseInfo.cpp:2095
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2031
unsigned decodeFieldVaVdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2053
unsigned decodeFieldHoldCnt(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2073
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2024
unsigned decodeFieldVmVsrc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2049
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
Definition AMDGPUBaseInfo.cpp:2104
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2200
static constexpr ExpTgt ExpTgtInfo[]
Definition AMDGPUBaseInfo.cpp:2155
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
Definition AMDGPUBaseInfo.cpp:2166
unsigned getTgtId(const StringRef Name)
Definition AMDGPUBaseInfo.cpp:2177
@ ET_DUAL_SRC_BLEND_MAX_IDX
constexpr uint32_t VersionMinor
HSA metadata minor version.
constexpr uint32_t VersionMajor
HSA metadata major version.
@ COMPLETION_ACTION_OFFSET
@ MULTIGRID_SYNC_ARG_OFFSET
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1372
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1389
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
Definition AMDGPUBaseInfo.cpp:1387
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1226
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1143
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1419
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1196
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1234
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1217
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1239
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition AMDGPUBaseInfo.cpp:1308
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1253
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1152
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1164
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1176
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1262
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition AMDGPUBaseInfo.cpp:1274
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
Definition AMDGPUBaseInfo.cpp:1081
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1232
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1350
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition AMDGPUBaseInfo.cpp:1291
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Definition AMDGPUBaseInfo.cpp:1344
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1215
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1490
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1244
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1463
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1509
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1502
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
Definition AMDGPUBaseInfo.cpp:1436
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
Definition AMDGPUBaseInfo.cpp:1339
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1400
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1255
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1407
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2301
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2326
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2295
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2238
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2267
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2260
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2305
StringRef getDfmtName(unsigned Id)
Definition AMDGPUBaseInfo.cpp:2233
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
Definition AMDGPUBaseInfo.cpp:2271
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2280
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2322
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2255
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2246
int64_t getDfmt(const StringRef Name)
Definition AMDGPUBaseInfo.cpp:2225
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
Definition AMDGPUBaseInfo.cpp:2275
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
Definition AMDGPUBaseInfo.cpp:2410
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2391
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2398
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2344
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
Definition AMDGPUBaseInfo.cpp:2365
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2340
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2385
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
Definition AMDGPUBaseInfo.cpp:2348
Definition AMDGPUBaseInfo.cpp:835
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
bool isPackedFP32Inst(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:3621
bool isGCN3Encoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2578
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3010
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2586
bool isInlineValue(MCRegister Reg)
Definition AMDGPUBaseInfo.cpp:2734
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2534
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
Definition AMDGPUBaseInfo.cpp:3142
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
Definition AMDGPUBaseInfo.cpp:1811
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3031
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
Definition AMDGPUBaseInfo.cpp:2633
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition AMDGPUBaseInfo.cpp:3293
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
Definition AMDGPUBaseInfo.cpp:1900
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
Definition AMDGPUBaseInfo.cpp:2707
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3265
bool isVOPCAsmOnly(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:561
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
Definition AMDGPUBaseInfo.cpp:303
bool getMTBUFHasSrsrc(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:486
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition AMDGPUBaseInfo.cpp:3331
bool getWMMAIsXDL(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:573
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
Definition AMDGPUBaseInfo.cpp:602
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1953
bool isGFX10Before1030(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2574
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
Definition AMDGPUBaseInfo.cpp:2794
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:785
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
Definition AMDGPUBaseInfo.cpp:1567
bool isInlinableLiteralV2I16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3161
int getMTBUFElements(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:476
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
Definition AMDGPUBaseInfo.cpp:2640
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
Definition AMDGPUBaseInfo.cpp:1989
unsigned getTemporalHintType(const MCInstrDesc TID)
Definition AMDGPUBaseInfo.cpp:753
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
Definition AMDGPUBaseInfo.cpp:2626
bool isGFX10(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2530
bool isInlinableLiteralV2BF16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3166
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2486
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3048
FPType getFPDstSelType(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:773
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
Definition AMDGPUBaseInfo.cpp:3341
bool hasA16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2458
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
Definition AMDGPUBaseInfo.cpp:3278
bool isGFX12Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2554
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
Definition AMDGPUBaseInfo.cpp:2475
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
Definition AMDGPUBaseInfo.cpp:3398
bool hasPackedD16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2466
unsigned getStorecntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1777
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3609
bool isGFX940(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2602
bool isInlinableLiteralV2F16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3171
bool isHsaAbi(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:198
bool isGFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2542
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:537
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2594
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
Definition AMDGPUBaseInfo.cpp:1692
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
Definition AMDGPUBaseInfo.cpp:578
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
Definition AMDGPUBaseInfo.cpp:653
bool isGroupSegment(const GlobalValue *GV)
Definition AMDGPUBaseInfo.cpp:1553
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:491
bool hasXNACK(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2445
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
Definition AMDGPUBaseInfo.cpp:3175
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
Definition AMDGPUBaseInfo.cpp:1860
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
Definition AMDGPUBaseInfo.cpp:635
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
Definition AMDGPUBaseInfo.cpp:1847
bool isVOPC64DPP(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:557
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
Definition AMDGPUBaseInfo.cpp:501
bool getMAIIsGFX940XDL(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:568
bool isSI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2492
unsigned getDefaultAMDHSACodeObjectVersion()
Definition AMDGPUBaseInfo.cpp:211
bool isReadOnlySegment(const GlobalValue *GV)
Definition AMDGPUBaseInfo.cpp:1561
bool isArgPassedInSGPR(const Argument *A)
Definition AMDGPUBaseInfo.cpp:3210
bool isIntrinsicAlwaysUniform(unsigned IntrID)
Definition AMDGPUBaseInfo.cpp:3376
int getMUBUFBaseOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:496
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition AMDGPUBaseInfo.cpp:202
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1806
unsigned getWaitcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1781
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:552
bool isGFX9(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2504
bool isDPALU_DPP32BitOpc(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:3578
bool getVOP1IsSingle(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:542
static bool isDwordAligned(uint64_t ByteOffset)
Definition AMDGPUBaseInfo.cpp:3289
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:625
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2582
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
Definition AMDGPUBaseInfo.cpp:2765
bool getHasColorExport(const Function &F)
Definition AMDGPUBaseInfo.cpp:2424
int getMTBUFBaseOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:465
bool isGFX90A(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2598
unsigned getSamplecntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1749
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:269
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
Definition AMDGPUBaseInfo.cpp:1575
bool hasSRAMECC(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2449
bool getHasDepthExport(const Function &F)
Definition AMDGPUBaseInfo.cpp:2431
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2516
bool getMUBUFHasVAddr(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:512
bool isTrue16Inst(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:768
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
Definition AMDGPUBaseInfo.cpp:3419
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
Definition AMDGPUBaseInfo.cpp:826
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:2984
bool isGFX12(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2550
unsigned getInitialPSInputAddr(const Function &F)
Definition AMDGPUBaseInfo.cpp:2420
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
Definition AMDGPUBaseInfo.cpp:1835
bool isAsyncStore(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:737
unsigned getDynamicVGPRBlockSize(const Function &F)
Definition AMDGPUBaseInfo.cpp:2435
unsigned getKmcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1769
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
Definition AMDGPUBaseInfo.cpp:3425
unsigned getVmcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1739
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2570
bool hasMAIInsts(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2610
unsigned getBitOp2(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:802
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
Definition AMDGPUBaseInfo.cpp:3372
unsigned getXcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1773
bool isGenericAtomic(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:715
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
Definition AMDGPUBaseInfo.cpp:617
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
Definition AMDGPUBaseInfo.cpp:1884
bool isGFX8Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2520
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1761
bool getMUBUFTfe(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:532
unsigned getBvhcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1753
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:193
bool hasMIMG_R128(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2453
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2590
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
Definition AMDGPUBaseInfo.cpp:3448
bool hasG16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2462
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
Definition AMDGPUBaseInfo.cpp:323
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
Definition AMDGPUBaseInfo.cpp:470
unsigned getExpcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1757
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2606
bool getMUBUFHasSoffset(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:522
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2568
bool isGFX11Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2546
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3137
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
Definition AMDGPUBaseInfo.cpp:2772
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
Definition AMDGPUBaseInfo.cpp:1603
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:258
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1941
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
Definition AMDGPUBaseInfo.cpp:1894
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2538
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1972
static bool isValidRegPrefix(char C)
Definition AMDGPUBaseInfo.cpp:1571
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
Definition AMDGPUBaseInfo.cpp:3302
bool isGlobalSegment(const GlobalValue *GV)
Definition AMDGPUBaseInfo.cpp:1557
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
Definition AMDGPUBaseInfo.cpp:3182
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_AC_LAST
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)
Definition AMDGPUBaseInfo.cpp:1715
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1519
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2528
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3598
bool hasGDS(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2471
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
Definition AMDGPUBaseInfo.cpp:3269
bool isGFX9Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2524
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2618
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:660
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
Definition AMDGPUBaseInfo.cpp:996
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
Definition AMDGPUBaseInfo.cpp:1826
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1801
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:702
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
Definition AMDGPUBaseInfo.cpp:1874
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3103
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
Definition AMDGPUBaseInfo.cpp:1924
bool isGFX1250(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2558
int getMCOpcode(uint16_t Opcode, unsigned Gen)
Definition AMDGPUBaseInfo.cpp:798
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:310
bool isVI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2500
bool isTensorStore(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:748
bool getMUBUFIsBufferInv(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:527
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
Definition AMDGPUBaseInfo.cpp:3541
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
Definition AMDGPUBaseInfo.cpp:2732
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3109
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1996
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2622
bool supportsWGP(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2562
bool isMAC(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:664
bool isCI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2496
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
Definition AMDGPUBaseInfo.cpp:1841
bool getVOP2IsSingle(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:547
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
Definition AMDGPUBaseInfo.cpp:563
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:280
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
Definition AMDGPUBaseInfo.cpp:1646
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
Definition AMDGPUBaseInfo.cpp:315
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2556
bool getMTBUFHasVAddr(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:481
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1793
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:228
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition AMDGPUBaseInfo.cpp:1611
unsigned getLoadcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1745
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3027
bool hasVOPD(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2614
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
Definition AMDGPUBaseInfo.cpp:817
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
Definition AMDGPUBaseInfo.cpp:1906
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
Definition AMDGPUBaseInfo.cpp:2967
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
Definition AMDGPUBaseInfo.cpp:594
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:245
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2512
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2508
int getMUBUFElements(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:507
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
Definition AMDGPUBaseInfo.cpp:1912
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:3380
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:790
bool isPermlane16(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:691
bool getMUBUFHasSrsrc(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:517
unsigned getDscntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1765
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3560
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ ELFABIVERSION_AMDGPU_HSA_V4
@ ELFABIVERSION_AMDGPU_HSA_V5
@ ELFABIVERSION_AMDGPU_HSA_V6
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::string utostr(uint64_t X, bool isNeg=false)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
To bit_cast(const From &from) noexcept
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
AMD Kernel Code Object (amd_kernel_code_t).
uint16_t amd_machine_version_major
uint16_t amd_machine_kind
uint16_t amd_machine_version_stepping
uint8_t private_segment_alignment
int64_t kernel_code_entry_byte_offset
uint32_t amd_kernel_code_version_major
uint16_t amd_machine_version_minor
uint8_t group_segment_alignment
uint8_t kernarg_segment_alignment
uint32_t amd_kernel_code_version_minor
uint64_t compute_pgm_resource_registers
Definition AMDGPUBaseInfo.cpp:416
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:417
bool IsDPMACCInstruction
Definition AMDGPUBaseInfo.cpp:418
unsigned Tgt
Definition AMDGPUBaseInfo.cpp:2150
StringLiteral Name
Definition AMDGPUBaseInfo.cpp:2149
unsigned MaxIndex
Definition AMDGPUBaseInfo.cpp:2151
Definition AMDGPUBaseInfo.cpp:421
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:422
bool HasFP8DstByteSel
Definition AMDGPUBaseInfo.cpp:423
bool HasFP4DstByteSel
Definition AMDGPUBaseInfo.cpp:424
Instruction set architecture version.
Definition AMDGPUBaseInfo.cpp:362
uint16_t BaseOpcode
Definition AMDGPUBaseInfo.cpp:364
bool has_srsrc
Definition AMDGPUBaseInfo.cpp:367
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:363
bool has_vaddr
Definition AMDGPUBaseInfo.cpp:366
uint8_t elements
Definition AMDGPUBaseInfo.cpp:365
bool has_soffset
Definition AMDGPUBaseInfo.cpp:368
Definition AMDGPUBaseInfo.cpp:351
bool IsBufferInv
Definition AMDGPUBaseInfo.cpp:358
bool has_srsrc
Definition AMDGPUBaseInfo.cpp:356
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:352
uint8_t elements
Definition AMDGPUBaseInfo.cpp:354
bool tfe
Definition AMDGPUBaseInfo.cpp:359
bool has_soffset
Definition AMDGPUBaseInfo.cpp:357
bool has_vaddr
Definition AMDGPUBaseInfo.cpp:355
uint16_t BaseOpcode
Definition AMDGPUBaseInfo.cpp:353
Definition AMDGPUBaseInfo.cpp:371
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:372
bool IsBuffer
Definition AMDGPUBaseInfo.cpp:373
Definition AMDGPUBaseInfo.cpp:389
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:390
Definition AMDGPUBaseInfo.cpp:381
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:382
Definition AMDGPUBaseInfo.cpp:385
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:386
Definition AMDGPUBaseInfo.cpp:393
uint16_t VOPDOp
Definition AMDGPUBaseInfo.cpp:395
uint16_t BaseVOP
Definition AMDGPUBaseInfo.cpp:394
bool CanBeVOPD3X
Definition AMDGPUBaseInfo.cpp:397
bool CanBeVOPDX
Definition AMDGPUBaseInfo.cpp:396
Definition AMDGPUBaseInfo.cpp:400
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:401
bool VOPD3
Definition AMDGPUBaseInfo.cpp:405
uint16_t OpX
Definition AMDGPUBaseInfo.cpp:402
uint16_t Subtarget
Definition AMDGPUBaseInfo.cpp:404
uint16_t OpY
Definition AMDGPUBaseInfo.cpp:403
Definition AMDGPUBaseInfo.cpp:376
bool IsSingle
Definition AMDGPUBaseInfo.cpp:378
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:377
Definition AMDGPUBaseInfo.cpp:408
bool IsTrue16
Definition AMDGPUBaseInfo.cpp:410
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:409
Represents the counter values to wait for in an s_waitcnt instruction.