LLVM: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
30#include
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
39 llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49
50
51
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57
58
59
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
67}
68
69
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
72}
73
74
75unsigned getExpcntBitShift(unsigned VersionMajor) {
77}
78
79
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
85}
86
87
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
90}
91
92
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
103}
104
105
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
108}
109
110
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
113}
114
115
116unsigned getDscntBitWidth(unsigned VersionMajor) {
118}
119
120
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
126}
127
128
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
131}
132
133
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
136}
137
138
139unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
141}
142
143
144inline unsigned getVaSdstBitWidth() { return 3; }
145
146
147inline unsigned getVaSdstBitShift() { return 9; }
148
149
150inline unsigned getVmVsrcBitWidth() { return 3; }
151
152
153inline unsigned getVmVsrcBitShift() { return 2; }
154
155
156inline unsigned getVaVdstBitWidth() { return 4; }
157
158
159inline unsigned getVaVdstBitShift() { return 12; }
160
161
162inline unsigned getVaVccBitWidth() { return 1; }
163
164
165inline unsigned getVaVccBitShift() { return 1; }
166
167
168inline unsigned getSaSdstBitWidth() { return 1; }
169
170
171inline unsigned getSaSdstBitShift() { return 0; }
172
173
174inline unsigned getVaSsrcBitWidth() { return 1; }
175
176
177inline unsigned getVaSsrcBitShift() { return 8; }
178
179
180inline unsigned getHoldCntWidth() { return 1; }
181
182
183inline unsigned getHoldCntBitShift() { return 7; }
184
185}
186
187namespace llvm {
188
190
191
192
196
197
201
204 M.getModuleFlag("amdhsa_code_object_version"))) {
205 return (unsigned)Ver->getZExtValue() / 100;
206 }
207
209}
210
214
216 switch (ABIVersion) {
218 return 4;
220 return 5;
222 return 6;
223 default:
225 }
226}
227
230 return 0;
231
232 switch (CodeObjectVersion) {
233 case 4:
235 case 5:
237 case 6:
239 default:
241 Twine(CodeObjectVersion));
242 }
243}
244
246 switch (CodeObjectVersion) {
248 return 48;
251 default:
253 }
254}
255
256
257
259 switch (CodeObjectVersion) {
261 return 24;
264 default:
266 }
267}
268
270 switch (CodeObjectVersion) {
272 return 32;
275 default:
277 }
278}
279
281 switch (CodeObjectVersion) {
283 return 40;
286 default:
288 }
289}
290
291#define GET_MIMGBaseOpcodesTable_IMPL
292#define GET_MIMGDimInfoTable_IMPL
293#define GET_MIMGInfoTable_IMPL
294#define GET_MIMGLZMappingTable_IMPL
295#define GET_MIMGMIPMappingTable_IMPL
296#define GET_MIMGBiasMappingTable_IMPL
297#define GET_MIMGOffsetMappingTable_IMPL
298#define GET_MIMGG16MappingTable_IMPL
299#define GET_MAIInstInfoTable_IMPL
300#define GET_WMMAInstInfoTable_IMPL
301#include "AMDGPUGenSearchableTables.inc"
302
304 unsigned VDataDwords, unsigned VAddrDwords) {
306 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
307 return Info ? Info->Opcode : -1;
308}
309
314
320 return NewInfo ? NewInfo->Opcode : -1;
321}
322
325 bool IsG16Supported) {
326 unsigned AddrWords = BaseOpcode->NumExtraArgs;
329 if (IsA16)
330 AddrWords += divideCeil(AddrComponents, 2);
331 else
332 AddrWords += AddrComponents;
333
334
335
336
337
338
340 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
341
342
343
345 else
347 }
348 return AddrWords;
349}
350
361
370
375
380
384
388
392
399
407
412
413#define GET_FP4FP8DstByteSelTable_DECL
414#define GET_FP4FP8DstByteSelTable_IMPL
415
420
426
427#define GET_MTBUFInfoTable_DECL
428#define GET_MTBUFInfoTable_IMPL
429#define GET_MUBUFInfoTable_DECL
430#define GET_MUBUFInfoTable_IMPL
431#define GET_SMInfoTable_DECL
432#define GET_SMInfoTable_IMPL
433#define GET_VOP1InfoTable_DECL
434#define GET_VOP1InfoTable_IMPL
435#define GET_VOP2InfoTable_DECL
436#define GET_VOP2InfoTable_IMPL
437#define GET_VOP3InfoTable_DECL
438#define GET_VOP3InfoTable_IMPL
439#define GET_VOPC64DPPTable_DECL
440#define GET_VOPC64DPPTable_IMPL
441#define GET_VOPC64DPP8Table_DECL
442#define GET_VOPC64DPP8Table_IMPL
443#define GET_VOPCAsmOnlyInfoTable_DECL
444#define GET_VOPCAsmOnlyInfoTable_IMPL
445#define GET_VOP3CAsmOnlyInfoTable_DECL
446#define GET_VOP3CAsmOnlyInfoTable_IMPL
447#define GET_VOPDComponentTable_DECL
448#define GET_VOPDComponentTable_IMPL
449#define GET_VOPDPairs_DECL
450#define GET_VOPDPairs_IMPL
451#define GET_VOPTrue16Table_DECL
452#define GET_VOPTrue16Table_IMPL
453#define GET_True16D16Table_IMPL
454#define GET_WMMAOpcode2AddrMappingTable_DECL
455#define GET_WMMAOpcode2AddrMappingTable_IMPL
456#define GET_WMMAOpcode3AddrMappingTable_DECL
457#define GET_WMMAOpcode3AddrMappingTable_IMPL
458#define GET_getMFMA_F8F6F4_WithSize_DECL
459#define GET_getMFMA_F8F6F4_WithSize_IMPL
460#define GET_isMFMA_F8F6F4Table_IMPL
461#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
462
463#include "AMDGPUGenSearchableTables.inc"
464
467 return Info ? Info->BaseOpcode : -1;
468}
469
472 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
473 return Info ? Info->Opcode : -1;
474}
475
478 return Info ? Info->elements : 0;
479}
480
483 return Info && Info->has_vaddr;
484}
485
488 return Info && Info->has_srsrc;
489}
490
493 return Info && Info->has_soffset;
494}
495
498 return Info ? Info->BaseOpcode : -1;
499}
500
503 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
504 return Info ? Info->Opcode : -1;
505}
506
509 return Info ? Info->elements : 0;
510}
511
514 return Info && Info->has_vaddr;
515}
516
519 return Info && Info->has_srsrc;
520}
521
524 return Info && Info->has_soffset;
525}
526
529 return Info && Info->IsBufferInv;
530}
531
536
541
544 return || Info->IsSingle;
545}
546
549 return || Info->IsSingle;
550}
551
554 return || Info->IsSingle;
555}
556
558 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
559}
560
562
567
570 return Info && Info->is_gfx940_xdl;
571}
572
575 return Info ? Info->is_wmma_xdl : false;
576}
577
579 switch (EncodingVal) {
582 return 6;
584 return 4;
587 default:
588 return 8;
589 }
590
592}
593
595 unsigned BLGP,
596 unsigned F8F8Opcode) {
599 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
600}
601
603 switch (Fmt) {
606 return 16;
609 return 12;
611 return 8;
612 }
613
615}
616
618 unsigned FmtB,
619 unsigned F8F8Opcode) {
622 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
623}
624
626 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
628 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
630 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
632 llvm_unreachable("Subtarget generation does not support VOPD!");
633}
634
636 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
637 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
640
641
642
643
646 EncodingFamily, VOPD3) != -1;
647 return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY};
648 }
649
650 return {false, false};
651}
652
654 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
655 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
657 return Info ? Info->VOPDOp : ~0u;
658}
659
663
665 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
666 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
667 Opc == AMDGPU::V_MAC_F32_e64_vi ||
668 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
669 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
670 Opc == AMDGPU::V_MAC_F16_e64_vi ||
671 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
672 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
673 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
674 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
675 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
676 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
677 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
678 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
679 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
680 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
681 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
682 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
683 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
684 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
685 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
686 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
687 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
688 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
689}
690
692 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
693 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
694 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
695 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
696 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
697 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
698 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
699 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
700}
701
703 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
704 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
705 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
706 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
707 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
708 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
709 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
710 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
711 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
712 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
713}
714
716 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
717 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
718 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
719 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
720 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
721 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
722 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
723 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
724 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
725 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
726 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
727 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
728 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
729 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
730 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
731 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
732 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
733 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
734 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
735}
736
738 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
739 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
740 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
741 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
742 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
743 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
744 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
745 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
746}
747
749 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
750 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
751}
752
767
772
777 if (Info->HasFP8DstByteSel)
779 if (Info->HasFP4DstByteSel)
781
783}
784
787 return Info ? Info->Opcode3Addr : ~0u;
788}
789
792 return Info ? Info->Opcode2Addr : ~0u;
793}
794
795
796
797
799 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
800}
801
803 switch (Opc) {
804 default:
805 return 0;
806 case AMDGPU::V_AND_B32_e32:
807 return 0x40;
808 case AMDGPU::V_OR_B32_e32:
809 return 0x54;
810 case AMDGPU::V_XOR_B32_e32:
811 return 0x14;
812 case AMDGPU::V_XNOR_B32_e32:
813 return 0x41;
814 }
815}
816
817int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
818 bool VOPD3) {
819 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
820 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
822 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
823 return Info ? Info->Opcode : -1;
824}
825
827 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
829 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
830 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
832 return {OpX->BaseVOP, OpY->BaseVOP};
833}
834
836
839
844 HasSrc2Acc = TiedIdx != -1;
846
851 : 1;
853
854 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
855 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
856
857
858 NumVOPD3Mods = 2;
859 if (IsVOP3)
860 SrcOperandsNum = 3;
862 getNamedOperandIdx(Opcode, OpName::src0))) {
863
864
865 NumVOPD3Mods = SrcOperandsNum;
866 if (HasSrc2Acc)
867 --NumVOPD3Mods;
868 }
869
871 return;
872
874 unsigned CompOprIdx;
875 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
877 MandatoryLiteralIdx = CompOprIdx;
878 break;
879 }
880 }
881}
882
884 return getNamedOperandIdx(Opcode, OpName::bitop3);
885}
886
889
892
896
897
898 return 0;
899}
900
902 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
904 bool VOPD3) const {
905
910
912 unsigned BanksMask) -> bool {
915 if (!BaseX)
916 BaseX = X;
917 if (!BaseY)
918 BaseY = Y;
919 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
920 return true;
921 if (BaseX != X &&
922 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
923 return true;
924 if (BaseY != Y &&
925 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
926 return true;
927
928
929
930 return false;
931 };
932
933 unsigned CompOprIdx;
937 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
938 continue;
939
942 return CompOprIdx;
943
945 continue;
946
948
949
950 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
951 return CompOprIdx;
952 if (VOPD3)
953 continue;
954 }
955
956 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
958 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
959 return CompOprIdx;
960 }
961
962 return {};
963}
964
965
966
967
968
969
970
971
973InstInfo::getRegIndices(unsigned CompIdx,
974 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
975 bool VOPD3) const {
977
978 const auto &Comp = CompInfo[CompIdx];
980
981 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
982
983 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
984 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
986 Comp.hasRegSrcOperand(CompSrcIdx)
987 ? GetRegIdx(CompIdx,
988 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
990 }
992}
993
994}
995
999
1003 const auto &OpXDesc = InstrInfo->get(OpX);
1004 const auto &OpYDesc = InstrInfo->get(OpY);
1009}
1010
1011namespace IsaInfo {
1012
1016 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
1018 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
1020}
1021
1023
1024
1025
1027 std::optional XnackRequested;
1028 std::optional SramEccRequested;
1029
1030 for (const std::string &Feature : Features.getFeatures()) {
1031 if (Feature == "+xnack")
1032 XnackRequested = true;
1033 else if (Feature == "-xnack")
1034 XnackRequested = false;
1035 else if (Feature == "+sramecc")
1036 SramEccRequested = true;
1037 else if (Feature == "-sramecc")
1038 SramEccRequested = false;
1039 }
1040
1043
1044 if (XnackRequested) {
1045 if (XnackSupported) {
1046 XnackSetting =
1048 } else {
1049
1050
1051 if (*XnackRequested) {
1052 errs() << "warning: xnack 'On' was requested for a processor that does "
1053 "not support it!\n";
1054 } else {
1055 errs() << "warning: xnack 'Off' was requested for a processor that "
1056 "does not support it!\n";
1057 }
1058 }
1059 }
1060
1061 if (SramEccRequested) {
1062 if (SramEccSupported) {
1063 SramEccSetting =
1065 } else {
1066
1067
1068
1069 if (*SramEccRequested) {
1070 errs() << "warning: sramecc 'On' was requested for a processor that "
1071 "does not support it!\n";
1072 } else {
1073 errs() << "warning: sramecc 'Off' was requested for a processor that "
1074 "does not support it!\n";
1075 }
1076 }
1077 }
1078}
1079
1089
1092 TargetID.split(TargetIDSplit, ':');
1093
1094 for (const auto &FeatureString : TargetIDSplit) {
1095 if (FeatureString.starts_with("xnack"))
1097 if (FeatureString.starts_with("sramecc"))
1099 }
1100}
1101
1103 std::string StringRep;
1105
1106 auto TargetTriple = STI.getTargetTriple();
1108
1109 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1110 << '-' << TargetTriple.getOSName() << '-'
1111 << TargetTriple.getEnvironmentName() << '-';
1112
1113 std::string Processor;
1114
1115
1116
1118 Processor = STI.getCPU().str();
1119 else
1122 .str();
1123
1124 std::string Features;
1125 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
1126
1128 Features += ":sramecc-";
1130 Features += ":sramecc+";
1131
1133 Features += ":xnack-";
1135 Features += ":xnack+";
1136 }
1137
1138 StreamRep << Processor << Features;
1139
1140 return StringRep;
1141}
1142
1145 return 16;
1147 return 32;
1148
1149 return 64;
1150}
1151
1154
1155
1156
1157
1159 BytesPerCU *= 2;
1160
1161 return BytesPerCU;
1162}
1163
1165 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1166 return 32768;
1167 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1168 return 65536;
1169 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1170 return 163840;
1171 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
1172 return 327680;
1173 return 32768;
1174}
1175
1177
1178
1179
1180
1183 return 4;
1184 }
1185
1186
1187
1189 return 2;
1190
1191
1192
1193 return 4;
1194}
1195
1197 unsigned FlatWorkGroupSize) {
1198 assert(FlatWorkGroupSize != 0);
1200 return 8;
1203 if (N == 1) {
1204
1205 return MaxWaves;
1206 }
1207
1208 unsigned MaxBarriers = 16;
1210 MaxBarriers = 32;
1211
1212 return std::min(MaxWaves / N, MaxBarriers);
1213}
1214
1216
1218
1220 return 8;
1222 return 10;
1224}
1225
1227 unsigned FlatWorkGroupSize) {
1230}
1231
1233
1235
1236 return 1024;
1237}
1238
1240 unsigned FlatWorkGroupSize) {
1242}
1243
1246 if (Version.Major >= 10)
1249 return 16;
1250 return 8;
1251}
1252
1254
1258 return 800;
1259 return 512;
1260}
1261
1265
1267 if (Version.Major >= 10)
1268 return 106;
1270 return 102;
1271 return 104;
1272}
1273
1275 assert(WavesPerEU != 0);
1276
1278 if (Version.Major >= 10)
1279 return 0;
1280
1282 return 0;
1283
1284 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1286 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1289}
1290
1292 bool Addressable) {
1293 assert(WavesPerEU != 0);
1294
1297 if (Version.Major >= 10)
1298 return Addressable ? AddressableNumSGPRs : 108;
1299 if (Version.Major >= 8 && !Addressable)
1300 AddressableNumSGPRs = 112;
1303 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1305 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1306}
1307
1309 bool FlatScrUsed, bool XNACKUsed) {
1310 unsigned ExtraSGPRs = 0;
1311 if (VCCUsed)
1312 ExtraSGPRs = 2;
1313
1315 if (Version.Major >= 10)
1316 return ExtraSGPRs;
1317
1318 if (Version.Major < 8) {
1319 if (FlatScrUsed)
1320 ExtraSGPRs = 4;
1321 } else {
1322 if (XNACKUsed)
1323 ExtraSGPRs = 4;
1324
1325 if (FlatScrUsed ||
1327 ExtraSGPRs = 6;
1328 }
1329
1330 return ExtraSGPRs;
1331}
1332
1334 bool FlatScrUsed) {
1337}
1338
1340 unsigned Granule) {
1341 return divideCeil(std::max(1u, NumRegs), Granule);
1342}
1343
1349
1351 unsigned DynamicVGPRBlockSize,
1352 std::optional EnableWavefrontSize32) {
1354 return 8;
1355
1356 if (DynamicVGPRBlockSize != 0)
1357 return DynamicVGPRBlockSize;
1358
1359 bool IsWave32 = EnableWavefrontSize32
1360 ? *EnableWavefrontSize32
1362
1364 return IsWave32 ? 24 : 12;
1365
1367 return IsWave32 ? 16 : 8;
1368
1369 return IsWave32 ? 8 : 4;
1370}
1371
1373 std::optional EnableWavefrontSize32) {
1375 return 8;
1376
1377 bool IsWave32 = EnableWavefrontSize32
1378 ? *EnableWavefrontSize32
1380
1382 return IsWave32 ? 16 : 8;
1383
1384 return IsWave32 ? 8 : 4;
1385}
1386
1388
1391 return 512;
1393 return 256;
1396 return IsWave32 ? 1536 : 768;
1397 return IsWave32 ? 1024 : 512;
1398}
1399
1402 if (Features.test(Feature1024AddressableVGPRs))
1403 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
1404 return 256;
1405}
1406
1408 unsigned DynamicVGPRBlockSize) {
1410 if (Features.test(FeatureGFX90AInsts))
1411 return 512;
1412
1413 if (DynamicVGPRBlockSize != 0)
1414
1417}
1418
1420 unsigned NumVGPRs,
1421 unsigned DynamicVGPRBlockSize) {
1425}
1426
1428 unsigned MaxWaves,
1429 unsigned TotalNumVGPRs) {
1430 if (NumVGPRs < Granule)
1431 return MaxWaves;
1432 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1433 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1434}
1435
1439 return MaxWaves;
1440
1442 if (SGPRs <= 80)
1443 return 10;
1444 if (SGPRs <= 88)
1445 return 9;
1446 if (SGPRs <= 100)
1447 return 8;
1448 return 7;
1449 }
1450 if (SGPRs <= 48)
1451 return 10;
1452 if (SGPRs <= 56)
1453 return 9;
1454 if (SGPRs <= 64)
1455 return 8;
1456 if (SGPRs <= 72)
1457 return 7;
1458 if (SGPRs <= 80)
1459 return 6;
1460 return 5;
1461}
1462
1464 unsigned DynamicVGPRBlockSize) {
1465 assert(WavesPerEU != 0);
1466
1468 if (WavesPerEU >= MaxWavesPerEU)
1469 return 0;
1470
1472 unsigned AddrsableNumVGPRs =
1475 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1476
1477 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1478 return 0;
1479
1481 DynamicVGPRBlockSize);
1482 if (WavesPerEU < MinWavesPerEU)
1483 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1484
1485 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1486 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1487 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1488}
1489
1491 unsigned DynamicVGPRBlockSize) {
1492 assert(WavesPerEU != 0);
1493
1494 unsigned MaxNumVGPRs =
1497 unsigned AddressableNumVGPRs =
1499 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1500}
1501
1503 std::optional EnableWavefrontSize32) {
1506 1;
1507}
1508
1510 unsigned NumVGPRs,
1511 unsigned DynamicVGPRBlockSize,
1512 std::optional EnableWavefrontSize32) {
1514 NumVGPRs,
1516}
1517}
1518
1532 } else {
1534 }
1535
1536
1537
1539
1540
1541
1545
1546 if (Version.Major >= 10) {
1550 }
1551}
1552
1556
1560
1566
1570
1572 return C == 'v' || C == 's' || C == 'a';
1573}
1574
1576 char Kind = RegName.front();
1578 return {};
1579
1581 if (RegName.consume_front("[")) {
1582 unsigned Idx, End;
1588 unsigned NumRegs = End - Idx + 1;
1589 if (NumRegs > 1)
1590 return {Kind, Idx, NumRegs};
1591 }
1592 } else {
1593 unsigned Idx;
1596 return {Kind, Idx, 1};
1597 }
1598
1599 return {};
1600}
1601
1602std::tuple<char, unsigned, unsigned>
1605 if (.consume_front("{") ||
.consume_back("}"))
1606 return {};
1608}
1609
1610std::pair<unsigned, unsigned>
1612 std::pair<unsigned, unsigned> Default,
1613 bool OnlyFirstRequired) {
1615 return {Attr->first, Attr->second.value_or(Default.second)};
1617}
1618
1619std::optional<std::pair<unsigned, std::optional>>
1621 bool OnlyFirstRequired) {
1623 if (.isStringAttribute())
1624 return std::nullopt;
1625
1627 std::pair<unsigned, std::optional> Ints;
1628 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1629 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1630 Ctx.emitError("can't parse first integer attribute " + Name);
1631 return std::nullopt;
1632 }
1633 unsigned Second = 0;
1634 if (Strs.second.trim().getAsInteger(0, Second)) {
1635 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1636 Ctx.emitError("can't parse second integer attribute " + Name);
1637 return std::nullopt;
1638 }
1639 } else {
1640 Ints.second = Second;
1641 }
1642
1643 return Ints;
1644}
1645
1647 unsigned Size,
1649 std::optional<SmallVector> R =
1652}
1653
1654std::optional<SmallVector>
1658
1660 if (.isValid())
1661 return std::nullopt;
1662 if (.isStringAttribute()) {
1663 Ctx.emitError(Name + " is not a string attribute");
1664 return std::nullopt;
1665 }
1666
1668
1670 unsigned i = 0;
1671 for (; !S.empty() && i < Size; i++) {
1672 std::pair<StringRef, StringRef> Strs = S.split(',');
1673 unsigned IntVal;
1674 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1675 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1676 Name);
1677 return std::nullopt;
1678 }
1679 Vals[i] = IntVal;
1680 S = Strs.second;
1681 }
1682
1684 Ctx.emitError("attribute " + Name +
1685 " has incorrect number of integers; expected " +
1687 return std::nullopt;
1688 }
1689 return Vals;
1690}
1691
1695 auto Low =
1699
1700
1701
1702
1704 if (Low.ule(Val) && High.ugt(Val))
1705 return true;
1706 } else {
1707 if (Low.uge(Val) && High.ult(Val))
1708 return true;
1709 }
1710 }
1711
1712 return false;
1713}
1714
1716 return (1 << (getVmcntBitWidthLo(Version.Major) +
1717 getVmcntBitWidthHi(Version.Major))) -
1718 1;
1719}
1720
1722 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1723}
1724
1726 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1727}
1728
1730 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1731}
1732
1734 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1735}
1736
1738 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1739}
1740
1742 return (1 << getDscntBitWidth(Version.Major)) - 1;
1743}
1744
1746 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1747}
1748
1750 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1751}
1752
1754 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1755}
1756
1758 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1759 getVmcntBitWidthLo(Version.Major));
1760 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1761 getExpcntBitWidth(Version.Major));
1762 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1763 getLgkmcntBitWidth(Version.Major));
1764 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1765 getVmcntBitWidthHi(Version.Major));
1766 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1767}
1768
1770 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1771 getVmcntBitWidthLo(Version.Major));
1772 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1773 getVmcntBitWidthHi(Version.Major));
1774 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1775}
1776
1778 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1779 getExpcntBitWidth(Version.Major));
1780}
1781
1783 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1784 getLgkmcntBitWidth(Version.Major));
1785}
1786
1788 unsigned &Expcnt, unsigned &Lgkmcnt) {
1792}
1793
1799 return Decoded;
1800}
1801
1803 unsigned Vmcnt) {
1805 getVmcntBitWidthLo(Version.Major));
1806 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1807 getVmcntBitShiftHi(Version.Major),
1808 getVmcntBitWidthHi(Version.Major));
1809}
1810
1812 unsigned Expcnt) {
1813 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1814 getExpcntBitWidth(Version.Major));
1815}
1816
1818 unsigned Lgkmcnt) {
1819 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1820 getLgkmcntBitWidth(Version.Major));
1821}
1822
1824 unsigned Expcnt, unsigned Lgkmcnt) {
1830}
1831
1835
1837 bool IsStore) {
1838 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1839 getDscntBitWidth(Version.Major));
1840 if (IsStore) {
1841 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1842 getStorecntBitWidth(Version.Major));
1843 return Dscnt | Storecnt;
1844 }
1845 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1846 getLoadcntBitWidth(Version.Major));
1847 return Dscnt | Loadcnt;
1848}
1849
1853 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1854 getLoadcntBitWidth(Version.Major));
1855 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1856 getDscntBitWidth(Version.Major));
1857 return Decoded;
1858}
1859
1863 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1864 getStorecntBitWidth(Version.Major));
1865 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1866 getDscntBitWidth(Version.Major));
1867 return Decoded;
1868}
1869
1871 unsigned Loadcnt) {
1872 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1873 getLoadcntBitWidth(Version.Major));
1874}
1875
1877 unsigned Storecnt) {
1878 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1879 getStorecntBitWidth(Version.Major));
1880}
1881
1883 unsigned Dscnt) {
1884 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1885 getDscntBitWidth(Version.Major));
1886}
1887
1889 unsigned Dscnt) {
1894}
1895
1899
1901 unsigned Storecnt, unsigned Dscnt) {
1906}
1907
1909 const Waitcnt &Decoded) {
1911}
1912
1913
1914
1915
1916
1920 unsigned Enc = 0;
1921 for (int Idx = 0; Idx < Size; ++Idx) {
1922 const auto &Op = Opr[Idx];
1923 if (Op.isSupported(STI))
1924 Enc |= Op.encode(Op.Default);
1925 }
1926 return Enc;
1927}
1928
1930 int Size, unsigned Code,
1931 bool &HasNonDefaultVal,
1933 unsigned UsedOprMask = 0;
1934 HasNonDefaultVal = false;
1935 for (int Idx = 0; Idx < Size; ++Idx) {
1936 const auto &Op = Opr[Idx];
1937 if (.isSupported(STI))
1938 continue;
1939 UsedOprMask |= Op.getMask();
1940 unsigned Val = Op.decode(Code);
1941 if (.isValid(Val))
1942 return false;
1943 HasNonDefaultVal |= (Val != Op.Default);
1944 }
1945 return (Code & ~UsedOprMask) == 0;
1946}
1947
1949 unsigned Code, int &Idx, StringRef &Name,
1950 unsigned &Val, bool &IsDefault,
1952 while (Idx < Size) {
1953 const auto &Op = Opr[Idx++];
1954 if (Op.isSupported(STI)) {
1955 Name = Op.Name;
1956 Val = Op.decode(Code);
1957 IsDefault = (Val == Op.Default);
1958 return true;
1959 }
1960 }
1961
1962 return false;
1963}
1964
1966 int64_t InputVal) {
1967 if (InputVal < 0 || InputVal > Op.Max)
1969 return Op.encode(InputVal);
1970}
1971
1973 const StringRef Name, int64_t InputVal,
1974 unsigned &UsedOprMask,
1977 for (int Idx = 0; Idx < Size; ++Idx) {
1978 const auto &Op = Opr[Idx];
1979 if (Op.Name == Name) {
1980 if (.isSupported(STI)) {
1982 continue;
1983 }
1984 auto OprMask = Op.getMask();
1985 if (OprMask & UsedOprMask)
1987 UsedOprMask |= OprMask;
1989 }
1990 }
1991 return InvalidId;
1992}
1993
1994
1995
1996
1997
1998namespace DepCtr {
1999
2001 static int Default = -1;
2005}
2006
2010 HasNonDefaultVal, STI);
2011}
2012
2016 IsDefault, STI);
2017}
2018
2024
2026 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2027}
2028
2030 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2031}
2032
2034 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2035}
2036
2038 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2039}
2040
2042 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
2043}
2044
2046 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2047}
2048
2050 return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
2051}
2052
2054 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2055}
2056
2061
2063 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2064}
2065
2070
2072 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2073}
2074
2079
2081 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2082}
2083
2088
2090 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
2091}
2092
2097
2099 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2100}
2101
2106
2108 return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
2109}
2110
2115
2116}
2117
2118
2119
2120
2121
2122namespace Exp {
2123
2129
2130
2140
2141
2144 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2145 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2146 Name = Val.Name;
2147 return true;
2148 }
2149 }
2150 return false;
2151}
2152
2154
2156 if (Val.MaxIndex == 0 && Name == Val.Name)
2157 return Val.Tgt;
2158
2159 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2160 StringRef Suffix = Name.drop_front(Val.Name.size());
2161
2162 unsigned Id;
2163 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2165
2166
2167 if (Suffix.size() > 1 && Suffix[0] == '0')
2169
2170 return Val.Tgt + Id;
2171 }
2172 }
2174}
2175
2177 switch (Id) {
2186 default:
2189 return true;
2190 }
2191}
2192
2193}
2194
2195
2196
2197
2198
2199namespace MTBUFFormat {
2200
2204 return Id;
2205 }
2207}
2208
2213
2221
2225 if (Name == lookupTable[Id])
2226 return Id;
2227 }
2229}
2230
2235
2237 unsigned Dfmt;
2238 unsigned Nfmt;
2241}
2242
2246
2250
2255
2260 return Id;
2261 }
2262 } else {
2265 return Id;
2266 }
2267 }
2269}
2270
2276
2280
2287 return Id;
2288 }
2289 } else {
2292 return Id;
2293 }
2294 }
2296}
2297
2301
2307
2308}
2309
2310
2311
2312
2313
2314namespace SendMsg {
2315
2319
2323
2325 bool Strict) {
2327
2328 if (!Strict)
2330
2333 return false;
2334
2336 }
2337
2339}
2340
2344
2345 if (!Strict)
2347
2349 switch (MsgId) {
2356 }
2357 }
2359}
2360
2366
2373
2378 OpId = 0;
2380 } else {
2383 }
2384}
2385
2389
2390}
2391
2392
2393
2394
2395
2397 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2398}
2399
2401
2402 return F.getFnAttributeAsParsedInteger(
2403 "amdgpu-color-export",
2405}
2406
2408 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2409}
2410
2413 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2414
2417
2418 return 0;
2419}
2420
2422 return STI.hasFeature(AMDGPU::FeatureXNACK);
2423}
2424
2426 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2427}
2428
2430 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2431 !STI.hasFeature(AMDGPU::FeatureR128A16);
2432}
2433
2435 return STI.hasFeature(AMDGPU::FeatureA16);
2436}
2437
2439 return STI.hasFeature(AMDGPU::FeatureG16);
2440}
2441
2443 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && (STI) &&
2445}
2446
2448 return STI.hasFeature(AMDGPU::FeatureGDS);
2449}
2450
2453 if (Version.Major == 10)
2454 return Version.Minor >= 3 ? 13 : 5;
2455 if (Version.Major == 11)
2456 return 5;
2457 if (Version.Major >= 12)
2458 return HasSampler ? 4 : 5;
2459 return 0;
2460}
2461
2464 return 32;
2465 return 16;
2466}
2467
2469 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2470}
2471
2473 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2474}
2475
2477 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2478}
2479
2481 return STI.hasFeature(AMDGPU::FeatureGFX9);
2482}
2483
2487
2491
2495
2499
2503
2505
2507 return STI.hasFeature(AMDGPU::FeatureGFX10);
2508}
2509
2513
2517
2519 return STI.hasFeature(AMDGPU::FeatureGFX11);
2520}
2521
2525
2529
2531
2533
2537
2543
2545
2549
2553
2555 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2556}
2557
2559 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2560}
2561
2563 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2564}
2565
2567 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2568}
2569
2573
2575 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2576}
2577
2579 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2580}
2581
2583 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2584}
2585
2587 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2588}
2589
2591 return STI.hasFeature(AMDGPU::FeatureVOPD);
2592}
2593
2595 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2596}
2597
2599 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2600}
2601
2603 int32_t ArgNumVGPR) {
2604 if (has90AInsts && ArgNumAGPR)
2605 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2606 return std::max(ArgNumVGPR, ArgNumAGPR);
2607}
2608
2610 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2611 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2612 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2613 Reg == AMDGPU::SCC;
2614}
2615
2619
2620#define MAP_REG2REG \
2621 using namespace AMDGPU; \
2622 switch (Reg.id()) { \
2623 default: \
2624 return Reg; \
2625 CASE_CI_VI(FLAT_SCR) \
2626 CASE_CI_VI(FLAT_SCR_LO) \
2627 CASE_CI_VI(FLAT_SCR_HI) \
2628 CASE_VI_GFX9PLUS(TTMP0) \
2629 CASE_VI_GFX9PLUS(TTMP1) \
2630 CASE_VI_GFX9PLUS(TTMP2) \
2631 CASE_VI_GFX9PLUS(TTMP3) \
2632 CASE_VI_GFX9PLUS(TTMP4) \
2633 CASE_VI_GFX9PLUS(TTMP5) \
2634 CASE_VI_GFX9PLUS(TTMP6) \
2635 CASE_VI_GFX9PLUS(TTMP7) \
2636 CASE_VI_GFX9PLUS(TTMP8) \
2637 CASE_VI_GFX9PLUS(TTMP9) \
2638 CASE_VI_GFX9PLUS(TTMP10) \
2639 CASE_VI_GFX9PLUS(TTMP11) \
2640 CASE_VI_GFX9PLUS(TTMP12) \
2641 CASE_VI_GFX9PLUS(TTMP13) \
2642 CASE_VI_GFX9PLUS(TTMP14) \
2643 CASE_VI_GFX9PLUS(TTMP15) \
2644 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2645 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2646 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2647 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2648 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2649 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2650 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2651 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2652 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2653 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2654 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2655 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2656 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2657 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2658 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2659 CASE_VI_GFX9PLUS( \
2660 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2661 CASE_GFXPRE11_GFX11PLUS(M0) \
2662 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2663 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2664 }
2665
2666#define CASE_CI_VI(node) \
2667 assert(!isSI(STI)); \
2668 case node: \
2669 return isCI(STI) ? node##_ci : node##_vi;
2670
2671#define CASE_VI_GFX9PLUS(node) \
2672 case node: \
2673 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2674
2675#define CASE_GFXPRE11_GFX11PLUS(node) \
2676 case node: \
2677 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2678
2679#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2680 case node: \
2681 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2682
2688
2689#undef CASE_CI_VI
2690#undef CASE_VI_GFX9PLUS
2691#undef CASE_GFXPRE11_GFX11PLUS
2692#undef CASE_GFXPRE11_GFX11PLUS_TO
2693
2694#define CASE_CI_VI(node) \
2695 case node##_ci: \
2696 case node##_vi: \
2697 return node;
2698#define CASE_VI_GFX9PLUS(node) \
2699 case node##_vi: \
2700 case node##_gfx9plus: \
2701 return node;
2702#define CASE_GFXPRE11_GFX11PLUS(node) \
2703 case node##_gfx11plus: \
2704 case node##_gfxpre11: \
2705 return node;
2706#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2707
2709
2711 switch (Reg.id()) {
2712 case AMDGPU::SRC_SHARED_BASE_LO:
2713 case AMDGPU::SRC_SHARED_BASE:
2714 case AMDGPU::SRC_SHARED_LIMIT_LO:
2715 case AMDGPU::SRC_SHARED_LIMIT:
2716 case AMDGPU::SRC_PRIVATE_BASE_LO:
2717 case AMDGPU::SRC_PRIVATE_BASE:
2718 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2719 case AMDGPU::SRC_PRIVATE_LIMIT:
2720 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2721 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2722 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2723 return true;
2724 case AMDGPU::SRC_VCCZ:
2725 case AMDGPU::SRC_EXECZ:
2726 case AMDGPU::SRC_SCC:
2727 return true;
2728 case AMDGPU::SGPR_NULL:
2729 return true;
2730 default:
2731 return false;
2732 }
2733}
2734
2735#undef CASE_CI_VI
2736#undef CASE_VI_GFX9PLUS
2737#undef CASE_GFXPRE11_GFX11PLUS
2738#undef CASE_GFXPRE11_GFX11PLUS_TO
2739#undef MAP_REG2REG
2740
2743 unsigned OpType = Desc.operands()[OpNo].OperandType;
2746}
2747
2750 unsigned OpType = Desc.operands()[OpNo].OperandType;
2751 switch (OpType) {
2764 return true;
2765 default:
2766 return false;
2767 }
2768}
2769
2772 unsigned OpType = Desc.operands()[OpNo].OperandType;
2777}
2778
2779
2780
2782 switch (RCID) {
2783 case AMDGPU::VGPR_16RegClassID:
2784 case AMDGPU::VGPR_16_Lo128RegClassID:
2785 case AMDGPU::SGPR_LO16RegClassID:
2786 case AMDGPU::AGPR_LO16RegClassID:
2787 return 16;
2788 case AMDGPU::SGPR_32RegClassID:
2789 case AMDGPU::VGPR_32RegClassID:
2790 case AMDGPU::VGPR_32_Lo256RegClassID:
2791 case AMDGPU::VRegOrLds_32RegClassID:
2792 case AMDGPU::AGPR_32RegClassID:
2793 case AMDGPU::VS_32RegClassID:
2794 case AMDGPU::AV_32RegClassID:
2795 case AMDGPU::SReg_32RegClassID:
2796 case AMDGPU::SReg_32_XM0RegClassID:
2797 case AMDGPU::SRegOrLds_32RegClassID:
2798 return 32;
2799 case AMDGPU::SGPR_64RegClassID:
2800 case AMDGPU::VS_64RegClassID:
2801 case AMDGPU::SReg_64RegClassID:
2802 case AMDGPU::VReg_64RegClassID:
2803 case AMDGPU::AReg_64RegClassID:
2804 case AMDGPU::SReg_64_XEXECRegClassID:
2805 case AMDGPU::VReg_64_Align2RegClassID:
2806 case AMDGPU::AReg_64_Align2RegClassID:
2807 case AMDGPU::AV_64RegClassID:
2808 case AMDGPU::AV_64_Align2RegClassID:
2809 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2810 case AMDGPU::VS_64_Lo256RegClassID:
2811 return 64;
2812 case AMDGPU::SGPR_96RegClassID:
2813 case AMDGPU::SReg_96RegClassID:
2814 case AMDGPU::VReg_96RegClassID:
2815 case AMDGPU::AReg_96RegClassID:
2816 case AMDGPU::VReg_96_Align2RegClassID:
2817 case AMDGPU::AReg_96_Align2RegClassID:
2818 case AMDGPU::AV_96RegClassID:
2819 case AMDGPU::AV_96_Align2RegClassID:
2820 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2821 return 96;
2822 case AMDGPU::SGPR_128RegClassID:
2823 case AMDGPU::SReg_128RegClassID:
2824 case AMDGPU::VReg_128RegClassID:
2825 case AMDGPU::AReg_128RegClassID:
2826 case AMDGPU::VReg_128_Align2RegClassID:
2827 case AMDGPU::AReg_128_Align2RegClassID:
2828 case AMDGPU::AV_128RegClassID:
2829 case AMDGPU::AV_128_Align2RegClassID:
2830 case AMDGPU::SReg_128_XNULLRegClassID:
2831 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2832 return 128;
2833 case AMDGPU::SGPR_160RegClassID:
2834 case AMDGPU::SReg_160RegClassID:
2835 case AMDGPU::VReg_160RegClassID:
2836 case AMDGPU::AReg_160RegClassID:
2837 case AMDGPU::VReg_160_Align2RegClassID:
2838 case AMDGPU::AReg_160_Align2RegClassID:
2839 case AMDGPU::AV_160RegClassID:
2840 case AMDGPU::AV_160_Align2RegClassID:
2841 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2842 return 160;
2843 case AMDGPU::SGPR_192RegClassID:
2844 case AMDGPU::SReg_192RegClassID:
2845 case AMDGPU::VReg_192RegClassID:
2846 case AMDGPU::AReg_192RegClassID:
2847 case AMDGPU::VReg_192_Align2RegClassID:
2848 case AMDGPU::AReg_192_Align2RegClassID:
2849 case AMDGPU::AV_192RegClassID:
2850 case AMDGPU::AV_192_Align2RegClassID:
2851 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2852 return 192;
2853 case AMDGPU::SGPR_224RegClassID:
2854 case AMDGPU::SReg_224RegClassID:
2855 case AMDGPU::VReg_224RegClassID:
2856 case AMDGPU::AReg_224RegClassID:
2857 case AMDGPU::VReg_224_Align2RegClassID:
2858 case AMDGPU::AReg_224_Align2RegClassID:
2859 case AMDGPU::AV_224RegClassID:
2860 case AMDGPU::AV_224_Align2RegClassID:
2861 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2862 return 224;
2863 case AMDGPU::SGPR_256RegClassID:
2864 case AMDGPU::SReg_256RegClassID:
2865 case AMDGPU::VReg_256RegClassID:
2866 case AMDGPU::AReg_256RegClassID:
2867 case AMDGPU::VReg_256_Align2RegClassID:
2868 case AMDGPU::AReg_256_Align2RegClassID:
2869 case AMDGPU::AV_256RegClassID:
2870 case AMDGPU::AV_256_Align2RegClassID:
2871 case AMDGPU::SReg_256_XNULLRegClassID:
2872 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2873 return 256;
2874 case AMDGPU::SGPR_288RegClassID:
2875 case AMDGPU::SReg_288RegClassID:
2876 case AMDGPU::VReg_288RegClassID:
2877 case AMDGPU::AReg_288RegClassID:
2878 case AMDGPU::VReg_288_Align2RegClassID:
2879 case AMDGPU::AReg_288_Align2RegClassID:
2880 case AMDGPU::AV_288RegClassID:
2881 case AMDGPU::AV_288_Align2RegClassID:
2882 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2883 return 288;
2884 case AMDGPU::SGPR_320RegClassID:
2885 case AMDGPU::SReg_320RegClassID:
2886 case AMDGPU::VReg_320RegClassID:
2887 case AMDGPU::AReg_320RegClassID:
2888 case AMDGPU::VReg_320_Align2RegClassID:
2889 case AMDGPU::AReg_320_Align2RegClassID:
2890 case AMDGPU::AV_320RegClassID:
2891 case AMDGPU::AV_320_Align2RegClassID:
2892 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
2893 return 320;
2894 case AMDGPU::SGPR_352RegClassID:
2895 case AMDGPU::SReg_352RegClassID:
2896 case AMDGPU::VReg_352RegClassID:
2897 case AMDGPU::AReg_352RegClassID:
2898 case AMDGPU::VReg_352_Align2RegClassID:
2899 case AMDGPU::AReg_352_Align2RegClassID:
2900 case AMDGPU::AV_352RegClassID:
2901 case AMDGPU::AV_352_Align2RegClassID:
2902 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
2903 return 352;
2904 case AMDGPU::SGPR_384RegClassID:
2905 case AMDGPU::SReg_384RegClassID:
2906 case AMDGPU::VReg_384RegClassID:
2907 case AMDGPU::AReg_384RegClassID:
2908 case AMDGPU::VReg_384_Align2RegClassID:
2909 case AMDGPU::AReg_384_Align2RegClassID:
2910 case AMDGPU::AV_384RegClassID:
2911 case AMDGPU::AV_384_Align2RegClassID:
2912 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
2913 return 384;
2914 case AMDGPU::SGPR_512RegClassID:
2915 case AMDGPU::SReg_512RegClassID:
2916 case AMDGPU::VReg_512RegClassID:
2917 case AMDGPU::AReg_512RegClassID:
2918 case AMDGPU::VReg_512_Align2RegClassID:
2919 case AMDGPU::AReg_512_Align2RegClassID:
2920 case AMDGPU::AV_512RegClassID:
2921 case AMDGPU::AV_512_Align2RegClassID:
2922 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
2923 return 512;
2924 case AMDGPU::SGPR_1024RegClassID:
2925 case AMDGPU::SReg_1024RegClassID:
2926 case AMDGPU::VReg_1024RegClassID:
2927 case AMDGPU::AReg_1024RegClassID:
2928 case AMDGPU::VReg_1024_Align2RegClassID:
2929 case AMDGPU::AReg_1024_Align2RegClassID:
2930 case AMDGPU::AV_1024RegClassID:
2931 case AMDGPU::AV_1024_Align2RegClassID:
2932 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
2933 return 1024;
2934 default:
2936 }
2937}
2938
2942
2945 return true;
2946
2957 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2958}
2959
2962 return true;
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2983 (Val == 0x3e22f983 && HasInv2Pi);
2984}
2985
2987 if (!HasInv2Pi)
2988 return false;
2990 return true;
2992 return Val == 0x3F00 ||
2993 Val == 0xBF00 ||
2994 Val == 0x3F80 ||
2995 Val == 0xBF80 ||
2996 Val == 0x4000 ||
2997 Val == 0xC000 ||
2998 Val == 0x4080 ||
2999 Val == 0xC080 ||
3000 Val == 0x3E22;
3001}
3002
3006
3008 if (!HasInv2Pi)
3009 return false;
3011 return true;
3013 return Val == 0x3C00 ||
3014 Val == 0xBC00 ||
3015 Val == 0x3800 ||
3016 Val == 0xB800 ||
3017 Val == 0x4000 ||
3018 Val == 0xC000 ||
3019 Val == 0x4400 ||
3020 Val == 0xC400 ||
3021 Val == 0x3118;
3022}
3023
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3037 return 128 + Signed;
3038
3040 return 192 + std::abs(Signed);
3041
3042 if (IsFloat) {
3043
3045 case 0x3800: return 240;
3046 case 0xB800: return 241;
3047 case 0x3C00: return 242;
3048 case 0xBC00: return 243;
3049 case 0x4000: return 244;
3050 case 0xC000: return 245;
3051 case 0x4400: return 246;
3052 case 0xC400: return 247;
3053 case 0x3118: return 248;
3054 default: break;
3055 }
3056
3057 } else {
3058
3060 case 0x3F000000: return 240;
3061 case 0xBF000000: return 241;
3062 case 0x3F800000: return 242;
3063 case 0xBF800000: return 243;
3064 case 0x40000000: return 244;
3065 case 0xC0000000: return 245;
3066 case 0x40800000: return 246;
3067 case 0xC0800000: return 247;
3068 case 0x3E22F983: return 248;
3069 default: break;
3070 }
3071
3072 }
3073
3074 return {};
3075}
3076
3077
3078
3082
3083
3084
3088 return 128 + Signed;
3089
3091 return 192 + std::abs(Signed);
3092
3093
3095 case 0x3F00: return 240;
3096 case 0xBF00: return 241;
3097 case 0x3F80: return 242;
3098 case 0xBF80: return 243;
3099 case 0x4000: return 244;
3100 case 0xC000: return 245;
3101 case 0x4080: return 246;
3102 case 0xC080: return 247;
3103 case 0x3E22: return 248;
3104 default: break;
3105 }
3106
3107
3108 return std::nullopt;
3109}
3110
3111
3112
3116
3117
3119 switch (OpType) {
3130 return false;
3131 default:
3133 }
3134}
3135
3136
3140
3141
3145
3146
3150
3152 if (IsFP64)
3153 return (Val);
3154
3156}
3157
3159 switch (Type) {
3160 default:
3161 break;
3166 return Imm & 0xffff;
3182 }
3183 return Imm;
3184}
3185
3188
3189
3191 switch (CC) {
3194 return true;
3205
3206
3207 return A->hasAttribute(Attribute::InReg) ||
3208 A->hasAttribute(Attribute::ByVal);
3209 default:
3210
3211 return A->hasAttribute(Attribute::InReg);
3212 }
3213}
3214
3216
3218 switch (CC) {
3221 return true;
3232
3233
3234 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3236 default:
3237 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3238 }
3239}
3240
3244
3246 int64_t EncodedOffset) {
3249
3252}
3253
3255 int64_t EncodedOffset, bool IsBuffer) {
3257 if (IsBuffer && EncodedOffset < 0)
3258 return false;
3259 return isInt<24>(EncodedOffset);
3260 }
3261
3263}
3264
3266 return (ByteOffset & 3) == 0;
3267}
3268
3272 return ByteOffset;
3273
3275 return ByteOffset >> 2;
3276}
3277
3279 int64_t ByteOffset, bool IsBuffer,
3280 bool HasSOffset) {
3281
3282
3283
3285 return std::nullopt;
3286
3287 if (isGFX12Plus(ST))
3288 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3289 : std::nullopt;
3290
3291
3294 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3295 : std::nullopt;
3296 }
3297
3299 return std::nullopt;
3300
3303 ? std::optional<int64_t>(EncodedOffset)
3304 : std::nullopt;
3305}
3306
3308 int64_t ByteOffset) {
3310 return std::nullopt;
3311
3313 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3314 : std::nullopt;
3315}
3316
3319 return 12;
3320
3322 return 24;
3323 return 13;
3324}
3325
3326namespace {
3327
3328struct SourceOfDivergence {
3329 unsigned Intr;
3330};
3331const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3332
3334 unsigned Intr;
3335};
3336const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3337
3338#define GET_SourcesOfDivergence_IMPL
3339#define GET_UniformIntrinsics_IMPL
3340#define GET_Gfx9BufferFormat_IMPL
3341#define GET_Gfx10BufferFormat_IMPL
3342#define GET_Gfx11PlusBufferFormat_IMPL
3343
3344#include "AMDGPUGenSearchableTables.inc"
3345
3346}
3347
3349 return lookupSourceOfDivergence(IntrID);
3350}
3351
3353 return lookupAlwaysUniform(IntrID);
3354}
3355
3360 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3361 BitsPerComp, NumComponents, NumFormat)
3363 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3364 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3365}
3366
3370 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3371 : getGfx9BufferFormatInfo(Format);
3372}
3373
3376 const unsigned VGPRClasses[] = {
3377 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3378 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3379 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3380 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3381 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3382 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3383 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3384 AMDGPU::VReg_1024RegClassID};
3385
3386 for (unsigned RCID : VGPRClasses) {
3389 return &RC;
3390 }
3391
3392 return nullptr;
3393}
3394
3396 unsigned Enc = MRI.getEncodingValue(Reg);
3398 return Idx >> 8;
3399}
3400
3403 unsigned Enc = MRI.getEncodingValue(Reg);
3405 if (Idx >= 0x100)
3407
3409 if (!RC)
3411
3412 Idx |= MSBs << 8;
3413 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3414
3415 Idx *= 2;
3417 ++Idx;
3418 }
3419
3421}
3422
3423std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3425 static const AMDGPU::OpName VOPOps[4] = {
3426 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3427 AMDGPU::OpName::vdst};
3428 static const AMDGPU::OpName VDSOps[4] = {
3429 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3430 AMDGPU::OpName::vdst};
3431 static const AMDGPU::OpName FLATOps[4] = {
3432 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3433 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3434 static const AMDGPU::OpName BUFOps[4] = {
3435 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3436 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3437 static const AMDGPU::OpName VIMGOps[4] = {
3438 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3439 AMDGPU::OpName::vdata};
3440
3441
3442
3443
3444 static const AMDGPU::OpName VOPDOpsX[4] = {
3445 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3446 AMDGPU::OpName::vdstX};
3447 static const AMDGPU::OpName VOPDOpsY[4] = {
3448 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3449 AMDGPU::OpName::vdstY};
3450
3451
3452 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3453 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3454 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3455 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3456 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3457 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3458 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3459 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3460 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3461
3462 unsigned TSFlags = Desc.TSFlags;
3463
3464 if (TSFlags &
3467 switch (Desc.getOpcode()) {
3468
3469 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3470 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3471 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3472 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3473 return {};
3474 case AMDGPU::V_FMAMK_F16:
3475 case AMDGPU::V_FMAMK_F16_t16:
3476 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3477 case AMDGPU::V_FMAMK_F16_fake16:
3478 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3479 case AMDGPU::V_FMAMK_F32:
3480 case AMDGPU::V_FMAMK_F32_gfx12:
3481 case AMDGPU::V_FMAMK_F64:
3482 case AMDGPU::V_FMAMK_F64_gfx1250:
3483 return {VOP2MADMKOps, nullptr};
3484 default:
3485 break;
3486 }
3487 return {VOPOps, nullptr};
3488 }
3489
3491 return {VDSOps, nullptr};
3492
3494 return {FLATOps, nullptr};
3495
3497 return {BUFOps, nullptr};
3498
3500 return {VIMGOps, nullptr};
3501
3504 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3505 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3506 }
3507
3509
3511 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3512 " these instructions are not expected on gfx1250");
3513
3514 return {};
3515}
3516
3519
3523 return false;
3524
3525
3528
3529
3532
3533 return false;
3534}
3535
3538 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3539 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3540 if (Idx == -1)
3541 continue;
3542
3546 if (RegClass == AMDGPU::VReg_64RegClassID ||
3547 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3548 return true;
3549 }
3550
3551 return false;
3552}
3553
3555 switch (Opc) {
3556 case AMDGPU::V_MUL_LO_U32_e64:
3557 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3558 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3559 case AMDGPU::V_MUL_HI_U32_e64:
3560 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3561 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3562 case AMDGPU::V_MUL_HI_I32_e64:
3563 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3564 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3565 case AMDGPU::V_MAD_U32_e64:
3566 case AMDGPU::V_MAD_U32_e64_dpp:
3567 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3568 return true;
3569 default:
3570 return false;
3571 }
3572}
3573
3576 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3577 return false;
3578
3580 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3581
3583}
3584
3586 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3587 return 64;
3588 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3589 return 128;
3590 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3591 return 320;
3592 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3593 return 512;
3594 return 64;
3595}
3596
3598 switch (Opc) {
3599 case AMDGPU::V_PK_ADD_F32:
3600 case AMDGPU::V_PK_ADD_F32_gfx12:
3601 case AMDGPU::V_PK_MUL_F32:
3602 case AMDGPU::V_PK_MUL_F32_gfx12:
3603 case AMDGPU::V_PK_FMA_F32:
3604 case AMDGPU::V_PK_FMA_F32_gfx12:
3605 return true;
3606 default:
3607 return false;
3608 }
3609}
3610
3615
3619
3622 return "";
3624 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3625 return Buffer.c_str();
3626 }
3628 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3629 << EncoVariableDims;
3630 return Buffer.c_str();
3631 }
3633 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3634 return Buffer.c_str();
3635 }
3636 }
3638}
3639
3641 std::optional<SmallVector> Attr =
3644
3645 if (!Attr.has_value())
3647 else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; }))
3649 else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; }))
3651
3654 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3655
3656 return A;
3657}
3658
3659}
3660
3663 switch (S) {
3665 OS << "Unsupported";
3666 break;
3668 OS << "Any";
3669 break;
3671 OS << "Off";
3672 break;
3674 OS << "On";
3675 break;
3676 }
3677 return OS;
3678}
3679
3680}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static llvm:🆑:opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm:🆑:Hidden, llvm:🆑:init(llvm::AMDGPU::AMDHSA_COV6), llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
#define MAP_REG2REG
Definition AMDGPUBaseInfo.cpp:2620
Provides AMDGPU specific target descriptions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Register const TargetRegisterInfo * TRI
#define S_00B848_MEM_ORDERED(x)
#define S_00B848_WGP_MODE(x)
#define S_00B848_FWD_PROGRESS(x)
unsigned unsigned DefaultVal
static const int BlockSize
static ClusterDimsAttr get(const Function &F)
Definition AMDGPUBaseInfo.cpp:3640
ClusterDimsAttr()=default
std::string to_string() const
Definition AMDGPUBaseInfo.cpp:3616
const std::array< unsigned, 3 > & getDims() const
Definition AMDGPUBaseInfo.cpp:3611
bool isSramEccSupported() const
void setTargetIDFromFeaturesString(StringRef FS)
Definition AMDGPUBaseInfo.cpp:1022
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1013
bool isXnackSupported() const
void setTargetIDFromTargetIDStream(StringRef TargetID)
Definition AMDGPUBaseInfo.cpp:1090
std::string toString() const
Definition AMDGPUBaseInfo.cpp:1102
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
Definition AMDGPUBaseInfo.cpp:887
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
int getBitOp3OperandIdx() const
Definition AMDGPUBaseInfo.cpp:883
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
Definition AMDGPUBaseInfo.cpp:901
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
This class represents an incoming formal argument to a Function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
This holds information about one operand of a machine instruction, indicating the register class for ...
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr unsigned id() const
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
A Module instance is used to store all the information related to an LLVM module.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned decodeFieldVaVcc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2041
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
Definition AMDGPUBaseInfo.cpp:2089
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt)
Definition AMDGPUBaseInfo.cpp:2107
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2013
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
Definition AMDGPUBaseInfo.cpp:2098
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
Definition AMDGPUBaseInfo.cpp:2062
unsigned decodeFieldSaSdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2033
unsigned decodeFieldVaSdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2037
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
Definition AMDGPUBaseInfo.cpp:2053
unsigned decodeFieldVaSsrc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2045
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2019
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
Definition AMDGPUBaseInfo.cpp:2071
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2007
unsigned decodeFieldVaVdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2029
unsigned decodeFieldHoldCnt(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2049
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2000
unsigned decodeFieldVmVsrc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2025
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
Definition AMDGPUBaseInfo.cpp:2080
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2176
static constexpr ExpTgt ExpTgtInfo[]
Definition AMDGPUBaseInfo.cpp:2131
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
Definition AMDGPUBaseInfo.cpp:2142
unsigned getTgtId(const StringRef Name)
Definition AMDGPUBaseInfo.cpp:2153
@ ET_DUAL_SRC_BLEND_MAX_IDX
constexpr uint32_t VersionMinor
HSA metadata minor version.
constexpr uint32_t VersionMajor
HSA metadata major version.
@ COMPLETION_ACTION_OFFSET
@ MULTIGRID_SYNC_ARG_OFFSET
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1372
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1389
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
Definition AMDGPUBaseInfo.cpp:1387
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1226
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1143
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1419
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1196
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1234
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1217
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1239
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition AMDGPUBaseInfo.cpp:1308
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1253
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1152
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1164
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1176
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1262
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition AMDGPUBaseInfo.cpp:1274
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
Definition AMDGPUBaseInfo.cpp:1081
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1232
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1350
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition AMDGPUBaseInfo.cpp:1291
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Definition AMDGPUBaseInfo.cpp:1344
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1215
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1490
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1244
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1463
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1509
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1502
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
Definition AMDGPUBaseInfo.cpp:1436
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
Definition AMDGPUBaseInfo.cpp:1339
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1400
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1255
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1407
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2277
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2302
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2271
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2214
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2243
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2236
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2281
StringRef getDfmtName(unsigned Id)
Definition AMDGPUBaseInfo.cpp:2209
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
Definition AMDGPUBaseInfo.cpp:2247
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2256
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2298
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2231
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2222
int64_t getDfmt(const StringRef Name)
Definition AMDGPUBaseInfo.cpp:2201
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
Definition AMDGPUBaseInfo.cpp:2251
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
Definition AMDGPUBaseInfo.cpp:2386
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2367
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2374
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2320
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
Definition AMDGPUBaseInfo.cpp:2341
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2316
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2361
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
Definition AMDGPUBaseInfo.cpp:2324
Definition AMDGPUBaseInfo.cpp:835
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
bool isPackedFP32Inst(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:3597
bool isGCN3Encoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2554
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:2986
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2562
bool isInlineValue(MCRegister Reg)
Definition AMDGPUBaseInfo.cpp:2710
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2510
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
Definition AMDGPUBaseInfo.cpp:3118
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
Definition AMDGPUBaseInfo.cpp:1787
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3007
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
Definition AMDGPUBaseInfo.cpp:2609
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition AMDGPUBaseInfo.cpp:3269
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
Definition AMDGPUBaseInfo.cpp:1876
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
Definition AMDGPUBaseInfo.cpp:2683
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3241
bool isVOPCAsmOnly(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:561
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
Definition AMDGPUBaseInfo.cpp:303
bool getMTBUFHasSrsrc(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:486
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition AMDGPUBaseInfo.cpp:3307
bool getWMMAIsXDL(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:573
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
Definition AMDGPUBaseInfo.cpp:602
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1929
bool isGFX10Before1030(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2550
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
Definition AMDGPUBaseInfo.cpp:2770
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:785
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
Definition AMDGPUBaseInfo.cpp:1567
bool isInlinableLiteralV2I16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3137
int getMTBUFElements(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:476
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
Definition AMDGPUBaseInfo.cpp:2616
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
Definition AMDGPUBaseInfo.cpp:1965
unsigned getTemporalHintType(const MCInstrDesc TID)
Definition AMDGPUBaseInfo.cpp:753
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
Definition AMDGPUBaseInfo.cpp:2602
bool isGFX10(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2506
bool isInlinableLiteralV2BF16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3142
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2462
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3024
FPType getFPDstSelType(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:773
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
Definition AMDGPUBaseInfo.cpp:3317
bool hasA16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2434
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
Definition AMDGPUBaseInfo.cpp:3254
bool isGFX12Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2530
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
Definition AMDGPUBaseInfo.cpp:2451
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
Definition AMDGPUBaseInfo.cpp:3374
bool hasPackedD16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2442
unsigned getStorecntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1753
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3585
bool isGFX940(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2578
bool isInlinableLiteralV2F16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3147
bool isHsaAbi(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:198
bool isGFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2518
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:537
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2570
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
Definition AMDGPUBaseInfo.cpp:1692
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
Definition AMDGPUBaseInfo.cpp:578
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
Definition AMDGPUBaseInfo.cpp:653
bool isGroupSegment(const GlobalValue *GV)
Definition AMDGPUBaseInfo.cpp:1553
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:491
bool hasXNACK(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2421
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
Definition AMDGPUBaseInfo.cpp:3151
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
Definition AMDGPUBaseInfo.cpp:1836
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
Definition AMDGPUBaseInfo.cpp:635
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
Definition AMDGPUBaseInfo.cpp:1823
bool isVOPC64DPP(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:557
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
Definition AMDGPUBaseInfo.cpp:501
bool getMAIIsGFX940XDL(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:568
bool isSI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2468
unsigned getDefaultAMDHSACodeObjectVersion()
Definition AMDGPUBaseInfo.cpp:211
bool isReadOnlySegment(const GlobalValue *GV)
Definition AMDGPUBaseInfo.cpp:1561
bool isArgPassedInSGPR(const Argument *A)
Definition AMDGPUBaseInfo.cpp:3186
bool isIntrinsicAlwaysUniform(unsigned IntrID)
Definition AMDGPUBaseInfo.cpp:3352
int getMUBUFBaseOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:496
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition AMDGPUBaseInfo.cpp:202
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1782
unsigned getWaitcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1757
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:552
bool isGFX9(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2480
bool isDPALU_DPP32BitOpc(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:3554
bool getVOP1IsSingle(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:542
static bool isDwordAligned(uint64_t ByteOffset)
Definition AMDGPUBaseInfo.cpp:3265
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:625
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2558
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
Definition AMDGPUBaseInfo.cpp:2741
bool getHasColorExport(const Function &F)
Definition AMDGPUBaseInfo.cpp:2400
int getMTBUFBaseOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:465
bool isGFX90A(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2574
unsigned getSamplecntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1725
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:269
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
Definition AMDGPUBaseInfo.cpp:1575
bool hasSRAMECC(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2425
bool getHasDepthExport(const Function &F)
Definition AMDGPUBaseInfo.cpp:2407
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2492
bool getMUBUFHasVAddr(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:512
bool isTrue16Inst(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:768
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
Definition AMDGPUBaseInfo.cpp:3395
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
Definition AMDGPUBaseInfo.cpp:826
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:2960
bool isGFX12(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2526
unsigned getInitialPSInputAddr(const Function &F)
Definition AMDGPUBaseInfo.cpp:2396
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
Definition AMDGPUBaseInfo.cpp:1811
bool isAsyncStore(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:737
unsigned getDynamicVGPRBlockSize(const Function &F)
Definition AMDGPUBaseInfo.cpp:2411
unsigned getKmcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1745
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
Definition AMDGPUBaseInfo.cpp:3401
unsigned getVmcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1715
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2546
bool hasMAIInsts(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2586
unsigned getBitOp2(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:802
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
Definition AMDGPUBaseInfo.cpp:3348
unsigned getXcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1749
bool isGenericAtomic(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:715
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
Definition AMDGPUBaseInfo.cpp:617
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
Definition AMDGPUBaseInfo.cpp:1860
bool isGFX8Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2496
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1737
bool getMUBUFTfe(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:532
unsigned getBvhcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1729
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:193
bool hasMIMG_R128(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2429
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2566
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
Definition AMDGPUBaseInfo.cpp:3424
bool hasG16(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2438
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
Definition AMDGPUBaseInfo.cpp:323
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
Definition AMDGPUBaseInfo.cpp:470
unsigned getExpcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1733
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2582
bool getMUBUFHasSoffset(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:522
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2544
bool isGFX11Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2522
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3113
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
Definition AMDGPUBaseInfo.cpp:2748
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
Definition AMDGPUBaseInfo.cpp:1603
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:258
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1917
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
Definition AMDGPUBaseInfo.cpp:1870
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2514
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1948
static bool isValidRegPrefix(char C)
Definition AMDGPUBaseInfo.cpp:1571
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
Definition AMDGPUBaseInfo.cpp:3278
bool isGlobalSegment(const GlobalValue *GV)
Definition AMDGPUBaseInfo.cpp:1557
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
Definition AMDGPUBaseInfo.cpp:3158
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_AC_LAST
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1519
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2504
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3574
bool hasGDS(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2447
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
Definition AMDGPUBaseInfo.cpp:3245
bool isGFX9Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2500
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2594
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:660
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
Definition AMDGPUBaseInfo.cpp:996
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
Definition AMDGPUBaseInfo.cpp:1802
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1777
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:702
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
Definition AMDGPUBaseInfo.cpp:1850
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3079
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
Definition AMDGPUBaseInfo.cpp:1900
bool isGFX1250(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2534
int getMCOpcode(uint16_t Opcode, unsigned Gen)
Definition AMDGPUBaseInfo.cpp:798
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:310
bool isVI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2476
bool isTensorStore(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:748
bool getMUBUFIsBufferInv(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:527
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
Definition AMDGPUBaseInfo.cpp:3517
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
Definition AMDGPUBaseInfo.cpp:2708
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
Definition AMDGPUBaseInfo.cpp:3085
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1972
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2598
bool supportsWGP(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2538
bool isMAC(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:664
bool isCI(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2472
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
Definition AMDGPUBaseInfo.cpp:1817
bool getVOP2IsSingle(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:547
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
Definition AMDGPUBaseInfo.cpp:563
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:280
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
Definition AMDGPUBaseInfo.cpp:1646
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
Definition AMDGPUBaseInfo.cpp:315
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2532
bool getMTBUFHasVAddr(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:481
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
Definition AMDGPUBaseInfo.cpp:1769
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:228
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition AMDGPUBaseInfo.cpp:1611
unsigned getLoadcntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1721
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3003
bool hasVOPD(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2590
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
Definition AMDGPUBaseInfo.cpp:817
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
Definition AMDGPUBaseInfo.cpp:1882
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
Definition AMDGPUBaseInfo.cpp:2943
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
Definition AMDGPUBaseInfo.cpp:594
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
Definition AMDGPUBaseInfo.cpp:245
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2488
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2484
int getMUBUFElements(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:507
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
Definition AMDGPUBaseInfo.cpp:1888
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:3356
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:790
bool isPermlane16(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:691
bool getMUBUFHasSrsrc(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:517
unsigned getDscntBitMask(const IsaVersion &Version)
Definition AMDGPUBaseInfo.cpp:1741
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3536
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ ELFABIVERSION_AMDGPU_HSA_V4
@ ELFABIVERSION_AMDGPU_HSA_V5
@ ELFABIVERSION_AMDGPU_HSA_V6
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::string utostr(uint64_t X, bool isNeg=false)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
To bit_cast(const From &from) noexcept
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
AMD Kernel Code Object (amd_kernel_code_t).
uint16_t amd_machine_version_major
uint16_t amd_machine_kind
uint16_t amd_machine_version_stepping
uint8_t private_segment_alignment
int64_t kernel_code_entry_byte_offset
uint32_t amd_kernel_code_version_major
uint16_t amd_machine_version_minor
uint8_t group_segment_alignment
uint8_t kernarg_segment_alignment
uint32_t amd_kernel_code_version_minor
uint64_t compute_pgm_resource_registers
Definition AMDGPUBaseInfo.cpp:416
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:417
bool IsDPMACCInstruction
Definition AMDGPUBaseInfo.cpp:418
unsigned Tgt
Definition AMDGPUBaseInfo.cpp:2126
StringLiteral Name
Definition AMDGPUBaseInfo.cpp:2125
unsigned MaxIndex
Definition AMDGPUBaseInfo.cpp:2127
Definition AMDGPUBaseInfo.cpp:421
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:422
bool HasFP8DstByteSel
Definition AMDGPUBaseInfo.cpp:423
bool HasFP4DstByteSel
Definition AMDGPUBaseInfo.cpp:424
Instruction set architecture version.
Definition AMDGPUBaseInfo.cpp:362
uint16_t BaseOpcode
Definition AMDGPUBaseInfo.cpp:364
bool has_srsrc
Definition AMDGPUBaseInfo.cpp:367
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:363
bool has_vaddr
Definition AMDGPUBaseInfo.cpp:366
uint8_t elements
Definition AMDGPUBaseInfo.cpp:365
bool has_soffset
Definition AMDGPUBaseInfo.cpp:368
Definition AMDGPUBaseInfo.cpp:351
bool IsBufferInv
Definition AMDGPUBaseInfo.cpp:358
bool has_srsrc
Definition AMDGPUBaseInfo.cpp:356
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:352
uint8_t elements
Definition AMDGPUBaseInfo.cpp:354
bool tfe
Definition AMDGPUBaseInfo.cpp:359
bool has_soffset
Definition AMDGPUBaseInfo.cpp:357
bool has_vaddr
Definition AMDGPUBaseInfo.cpp:355
uint16_t BaseOpcode
Definition AMDGPUBaseInfo.cpp:353
Definition AMDGPUBaseInfo.cpp:371
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:372
bool IsBuffer
Definition AMDGPUBaseInfo.cpp:373
Definition AMDGPUBaseInfo.cpp:389
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:390
Definition AMDGPUBaseInfo.cpp:381
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:382
Definition AMDGPUBaseInfo.cpp:385
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:386
Definition AMDGPUBaseInfo.cpp:393
uint16_t VOPDOp
Definition AMDGPUBaseInfo.cpp:395
uint16_t BaseVOP
Definition AMDGPUBaseInfo.cpp:394
bool CanBeVOPD3X
Definition AMDGPUBaseInfo.cpp:397
bool CanBeVOPDX
Definition AMDGPUBaseInfo.cpp:396
Definition AMDGPUBaseInfo.cpp:400
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:401
bool VOPD3
Definition AMDGPUBaseInfo.cpp:405
uint16_t OpX
Definition AMDGPUBaseInfo.cpp:402
uint16_t Subtarget
Definition AMDGPUBaseInfo.cpp:404
uint16_t OpY
Definition AMDGPUBaseInfo.cpp:403
Definition AMDGPUBaseInfo.cpp:376
bool IsSingle
Definition AMDGPUBaseInfo.cpp:378
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:377
Definition AMDGPUBaseInfo.cpp:408
bool IsTrue16
Definition AMDGPUBaseInfo.cpp:410
uint16_t Opcode
Definition AMDGPUBaseInfo.cpp:409
Represents the counter values to wait for in an s_waitcnt instruction.