LLVM: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
37
38using namespace llvm;
39
40#define DEBUG_TYPE "amdgpu-disassembler"
41
42#define SGPR_MAX \
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
45
47
50 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
53
56
58 createConstantSymbolExpr(Symbol, Code);
59
60 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
61 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
62 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
63}
64
67}
68
75}
76
80 if (OpIdx != -1) {
82 std::advance(I, OpIdx);
84 }
85 return OpIdx;
86}
87
92
93
94 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
95
96 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
99}
100
105 if (DAsm->isGFX12Plus()) {
106 Offset = SignExtend64<24>(Imm);
107 } else if (DAsm->isVI()) {
108 Offset = Imm & 0xFFFFF;
109 } else {
110 Offset = SignExtend64<21>(Imm);
111 }
113}
114
118 return addOperand(Inst, DAsm->decodeBoolReg(Val));
119}
120
125 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
126}
127
131 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
132}
133
134#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
135 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
136 uint64_t , \
137 const MCDisassembler *Decoder) { \
138 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
139 return addOperand(Inst, DAsm->DecoderName(Imm)); \
140 }
141
142
143
144#define DECODE_OPERAND_REG_8(RegClass) \
145 static DecodeStatus Decode##RegClass##RegisterClass( \
146 MCInst &Inst, unsigned Imm, uint64_t , \
147 const MCDisassembler *Decoder) { \
148 assert(Imm < (1 << 8) && "8-bit encoding"); \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand( \
151 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
152 }
153
154#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
155 ImmWidth) \
156 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
157 const MCDisassembler *Decoder) { \
158 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
160 return addOperand(Inst, \
161 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
162 MandatoryLiteral, ImmWidth)); \
163 }
164
167 unsigned Imm, unsigned EncImm,
168 bool MandatoryLiteral, unsigned ImmWidth,
171 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
174 ImmWidth, Sema));
175}
176
177
178
179#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
181
182
183
184
185
186template <AMDGPUDisassembler::OpWidthTy OpWidth>
191}
192
193
194template <AMDGPUDisassembler::OpWidthTy OpWidth>
198 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
200}
201
202
203
204
205template <AMDGPUDisassembler::OpWidthTy OpWidth>
208 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
210}
211
212
213
214template <AMDGPUDisassembler::OpWidthTy OpWidth>
218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
220}
221
222
223
224
225
226
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
234}
235
236
237
243 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
245}
246
252 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
254}
255
256
257
258
259
272
286
293
297 assert(isUInt<10>(Imm) && "10-bit encoding expected");
298 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
299
300 bool IsHi = Imm & (1 << 9);
301 unsigned RegIdx = Imm & 0xff;
303 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
304}
305
309 assert(isUInt<8>(Imm) && "8-bit encoding expected");
310
311 bool IsHi = Imm & (1 << 7);
312 unsigned RegIdx = Imm & 0x7f;
314 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
315}
316
322 assert(isUInt<9>(Imm) && "9-bit encoding expected");
323
326 bool IsHi = Imm & (1 << 7);
327 unsigned RegIdx = Imm & 0x7f;
328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329 }
330 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
331 OpWidth, Imm & 0xFF, false, ImmWidth,
333}
334
342 assert(isUInt<9>(Imm) && "9-bit encoding expected");
343
345 bool IsHi = Imm & (1 << 7);
346 unsigned RegIdx = Imm & 0x7f;
347 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
348 }
349 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
350 OpWidth, Imm & 0xFF, true, ImmWidth,
352}
353
359 assert(isUInt<10>(Imm) && "10-bit encoding expected");
360
363 bool IsHi = Imm & (1 << 9);
364 unsigned RegIdx = Imm & 0xff;
365 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
366 }
367 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
368 OpWidth, Imm & 0xFF, false, ImmWidth,
370}
371
375 assert(isUInt<10>(Imm) && "10-bit encoding expected");
377
379
380 bool IsHi = Imm & (1 << 9);
381 unsigned RegIdx = Imm & 0xff;
382 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
383}
384
389 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
390}
391
395 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
396}
397
400 if (OpIdx < 0)
401 return false;
402
404 if (.isReg())
405 return false;
406
407 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
408 auto Reg = Sub ? Sub : Op.getReg();
409 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
410}
411
416 if (!DAsm->isGFX90A()) {
417 Imm &= 511;
418 } else {
419
420
421
422
423
425 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
427 : AMDGPU::OpName::vdata;
428 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
433 Imm |= 512;
434 }
435
440 Imm |= 512;
441 }
442 }
443 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
444}
445
446template <AMDGPUDisassembler::OpWidthTy Opw>
450 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
451}
452
456 assert(Imm < (1 << 9) && "9-bit encoding");
461}
462
463#define DECODE_SDWA(DecName) \
464DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
465
469
474 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
475}
476
477#include "AMDGPUGenDisassemblerTables.inc"
478
479
480
481
482
485 const auto Res =
486 support::endian::read<T, llvm::endianness::little>(Bytes.data());
487 Bytes = Bytes.slice(sizeof(T));
488 return Res;
489}
490
494 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
495 Bytes = Bytes.slice(8);
497 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
498 Bytes = Bytes.slice(4);
500}
501
505 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
506 Bytes = Bytes.slice(8);
508 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
509 Bytes = Bytes.slice(8);
511}
512
517 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
518 Bytes = Bytes_.slice(0, MaxInstBytesNum);
519
520
521
522 Size = std::min((size_t)4, Bytes_.size());
523
524 do {
525
526
527
528
529
532
534 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
536 break;
537
539 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
541 break;
542
545 break;
546
547
548 Bytes = Bytes_.slice(0, MaxInstBytesNum);
549
550 } else if (Bytes.size() >= 16 &&
554 break;
555
556
557 Bytes = Bytes_.slice(0, MaxInstBytesNum);
558 }
559
560 if (Bytes.size() >= 8) {
561 const uint64_t QW = eatBytes<uint64_t>(Bytes);
562
563 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
565 break;
566
567 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
569 break;
570
571 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
573 break;
574
575
576
577
578 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
580 break;
581
582 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
584 break;
585
586 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
588 break;
589
592 break;
593
595 break;
596
598 break;
599
601 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
603 break;
604
606 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
608 break;
609
612 break;
613
616 break;
617
618
619 Bytes = Bytes_.slice(0, MaxInstBytesNum);
620 }
621
622
623 if (Bytes.size() >= 4) {
624 const uint32_t DW = eatBytes<uint32_t>(Bytes);
625
628 break;
629
631 break;
632
634 break;
635
636 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
638 break;
639
640 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
642 break;
643
644 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
646 break;
647
649 break;
650
652 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
654 break;
655
657 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
659 break;
660 }
661
663 } while (false);
664
668
676 -1)
680 }
681
683
685
687 AMDGPU::OpName::src2_modifiers);
688 }
689
690 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
691 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
692
694 AMDGPU::OpName::src2_modifiers);
695 }
696
700 }
701
702 if (MCII->get(MI.getOpcode()).TSFlags &
705 AMDGPU::OpName::cpol);
706 if (CPolPos != -1) {
707 unsigned CPol =
710 if (MI.getNumOperands() <= (unsigned)CPolPos) {
712 AMDGPU::OpName::cpol);
713 } else if (CPol) {
714 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
715 }
716 }
717 }
718
719 if ((MCII->get(MI.getOpcode()).TSFlags &
722
723 int TFEOpIdx =
725 if (TFEOpIdx != -1) {
726 auto *TFEIter = MI.begin();
727 std::advance(TFEIter, TFEOpIdx);
729 }
730 }
731
732 if (MCII->get(MI.getOpcode()).TSFlags &
734 int SWZOpIdx =
736 if (SWZOpIdx != -1) {
737 auto *SWZIter = MI.begin();
738 std::advance(SWZIter, SWZOpIdx);
740 }
741 }
742
744 int VAddr0Idx =
746 int RsrcIdx =
748 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
749 if (VAddr0Idx >= 0 && NSAArgs > 0) {
750 unsigned NSAWords = (NSAArgs + 3) / 4;
751 if (Bytes.size() < 4 * NSAWords)
753 for (unsigned i = 0; i < NSAArgs; ++i) {
754 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
755 auto VAddrRCID =
756 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
758 }
759 Bytes = Bytes.slice(4 * NSAWords);
760 }
761
763 }
764
765 if (MCII->get(MI.getOpcode()).TSFlags &
768
771
774
777
780
782 AMDGPU::OpName::vdst_in);
783 if (VDstIn_Idx != -1) {
784 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
786 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
787 .getOperand(VDstIn_Idx).isReg() ||
788 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
789 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
790 MI.erase(&MI.getOperand(VDstIn_Idx));
793 AMDGPU::OpName::vdst_in);
794 }
795 }
796
797 int ImmLitIdx =
800 if (ImmLitIdx != -1 && !IsSOPK)
802
803 Size = MaxInstBytesNum - Bytes.size();
805}
806
809
810
813 }
814}
815
818 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
819 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
820 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
821 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
822 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
823 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
824 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
825 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
826 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
827 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
828 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
829 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
830 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
831 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
832 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
833 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
834
835
837 }
838}
839
844
846 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
848 if (SDst != -1) {
849
851 AMDGPU::OpName::sdst);
852 } else {
853
855 }
856 }
857}
858
859
860
863 switch (NumRegs) {
864 case 4:
865 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
866 case 6:
868 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
869 case 8:
870
871 return;
872 default:
874 }
875}
876
877
878
879
880
881
883 int BlgpIdx =
885 if (BlgpIdx == -1)
886 return;
887
888 int CbszIdx =
890
891 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
892 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
893
896 if (!AdjustedRegClassOpcode ||
897 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
898 return;
899
900 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
901 int Src0Idx =
903 int Src1Idx =
909}
910
912 unsigned OpSel = 0;
913 unsigned OpSelHi = 0;
914 unsigned NegLo = 0;
915 unsigned NegHi = 0;
916};
917
918
919
920
922 bool IsVOP3P = false) {
924 unsigned Opc = MI.getOpcode();
925 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
926 AMDGPU::OpName::src1_modifiers,
927 AMDGPU::OpName::src2_modifiers};
928 for (int J = 0; J < 3; ++J) {
930 if (OpIdx == -1)
931 continue;
932
933 unsigned Val = MI.getOperand(OpIdx).getImm();
934
936 if (IsVOP3P) {
940 } else if (J == 0) {
942 }
943 }
944
945 return Modifiers;
946}
947
948
949
951 const unsigned Opc = MI.getOpcode();
953 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
954 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
955 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
957 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
959 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
961 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
963 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
966 if (OpIdx == -1 || OpModsIdx == -1)
967 continue;
969 if (.isReg())
970 continue;
971 if (!ConversionRC.contains(Op.getReg()))
972 continue;
974 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
975 unsigned ModVal = OpMods.getImm();
976 if (ModVal & OpSelMask) {
978 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
979 }
980 }
981}
982
983
984
986 constexpr int DST_IDX = 0;
987 auto Opcode = MI.getOpcode();
988 const auto &Desc = MCII->get(Opcode);
990
991 if (OldIdx != -1 && Desc.getOperandConstraint(
997 (void)DST_IDX;
998 return true;
999 }
1000
1001 return false;
1002}
1003
1004
1006 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1009 AMDGPU::OpName::src2_modifiers);
1010}
1011
1013 unsigned Opc = MI.getOpcode();
1014
1015 int VDstInIdx =
1017 if (VDstInIdx != -1)
1019
1020 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1021 if (MI.getNumOperands() < DescNumOps &&
1026 AMDGPU::OpName::op_sel);
1027 } else {
1028
1029 if (MI.getNumOperands() < DescNumOps &&
1032 AMDGPU::OpName::src0_modifiers);
1033
1034 if (MI.getNumOperands() < DescNumOps &&
1037 AMDGPU::OpName::src1_modifiers);
1038 }
1039}
1040
1043
1044 int VDstInIdx =
1046 if (VDstInIdx != -1)
1048
1049 unsigned Opc = MI.getOpcode();
1050 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1051 if (MI.getNumOperands() < DescNumOps &&
1055 AMDGPU::OpName::op_sel);
1056 }
1057}
1058
1059
1060
1061
1063 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1064
1066 AMDGPU::OpName::vdst);
1067
1069 AMDGPU::OpName::vdata);
1070 int VAddr0Idx =
1072 int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
1073 : AMDGPU::OpName::rsrc;
1076 AMDGPU::OpName::dmask);
1077
1079 AMDGPU::OpName::tfe);
1081 AMDGPU::OpName::d16);
1082
1086
1087 assert(VDataIdx != -1);
1088 if (BaseOpcode->BVH) {
1089
1091 return;
1092 }
1093
1094 bool IsAtomic = (VDstIdx != -1);
1097 bool IsNSA = false;
1098 bool IsPartialNSA = false;
1099 unsigned AddrSize = Info->VAddrDwords;
1100
1102 unsigned DimIdx =
1104 int A16Idx =
1108 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1109
1110 AddrSize =
1112
1113
1114
1115 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1116 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1117 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1118 if (!IsNSA) {
1119 if (!IsVSample && AddrSize > 12)
1120 AddrSize = 16;
1121 } else {
1122 if (AddrSize > Info->VAddrDwords) {
1123 if (.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1124
1125
1126 return;
1127 }
1128 IsPartialNSA = true;
1129 }
1130 }
1131 }
1132
1133 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1134 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1135
1136 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1138 DstSize = (DstSize + 1) / 2;
1139 }
1140
1141 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1142 DstSize += 1;
1143
1144 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1145 return;
1146
1147 int NewOpcode =
1149 if (NewOpcode == -1)
1150 return;
1151
1152
1154 if (DstSize != Info->VDataDwords) {
1155 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1156
1157
1158 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1160 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1161
1164 if (!NewVdata) {
1165
1166
1167 return;
1168 }
1169 }
1170
1171
1172
1173 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1175 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1176 AddrSize != Info->VAddrDwords) {
1177 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1179 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1180
1181 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1184 if (!NewVAddrSA)
1185 return;
1186 }
1187
1188 MI.setOpcode(NewOpcode);
1189
1190 if (NewVdata != AMDGPU::NoRegister) {
1192
1193 if (IsAtomic) {
1194
1196 }
1197 }
1198
1199 if (NewVAddrSA) {
1201 } else if (IsNSA) {
1202 assert(AddrSize <= Info->VAddrDwords);
1203 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1204 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1205 }
1206}
1207
1208
1209
1210
1212 unsigned Opc = MI.getOpcode();
1213 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1215
1216 if (MI.getNumOperands() < DescNumOps &&
1219
1220 if (MI.getNumOperands() < DescNumOps &&
1223 AMDGPU::OpName::op_sel);
1224 if (MI.getNumOperands() < DescNumOps &&
1227 AMDGPU::OpName::op_sel_hi);
1228 if (MI.getNumOperands() < DescNumOps &&
1231 AMDGPU::OpName::neg_lo);
1232 if (MI.getNumOperands() < DescNumOps &&
1235 AMDGPU::OpName::neg_hi);
1236}
1237
1238
1240 unsigned Opc = MI.getOpcode();
1241 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1242
1243 if (MI.getNumOperands() < DescNumOps &&
1246
1247 if (MI.getNumOperands() < DescNumOps &&
1250 AMDGPU::OpName::src0_modifiers);
1251
1252 if (MI.getNumOperands() < DescNumOps &&
1255 AMDGPU::OpName::src1_modifiers);
1256}
1257
1259 unsigned Opc = MI.getOpcode();
1260 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1261
1263
1264 if (MI.getNumOperands() < DescNumOps &&
1268 AMDGPU::OpName::op_sel);
1269 }
1270}
1271
1273 assert(HasLiteral && "Should have decoded a literal");
1275 unsigned DescNumOps = Desc.getNumOperands();
1277 AMDGPU::OpName::immDeferred);
1278 assert(DescNumOps == MI.getNumOperands());
1279 for (unsigned I = 0; I < DescNumOps; ++I) {
1280 auto &Op = MI.getOperand(I);
1281 auto OpType = Desc.operands()[I].OperandType;
1285 IsDeferredOp)
1286 Op.setImm(Literal);
1287 }
1288}
1289
1293}
1294
1295inline
1297 const Twine& ErrMsg) const {
1299
1300
1301
1303}
1304
1305inline
1308}
1309
1310inline
1312 unsigned Val) const {
1313 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1314 if (Val >= RegCl.getNumRegs())
1316 ": unknown register " + Twine(Val));
1318}
1319
1320inline
1322 unsigned Val) const {
1323
1324
1325 int shift = 0;
1326 switch (SRegClassID) {
1327 case AMDGPU::SGPR_32RegClassID:
1328 case AMDGPU::TTMP_32RegClassID:
1329 break;
1330 case AMDGPU::SGPR_64RegClassID:
1331 case AMDGPU::TTMP_64RegClassID:
1332 shift = 1;
1333 break;
1334 case AMDGPU::SGPR_96RegClassID:
1335 case AMDGPU::TTMP_96RegClassID:
1336 case AMDGPU::SGPR_128RegClassID:
1337 case AMDGPU::TTMP_128RegClassID:
1338
1339
1340 case AMDGPU::SGPR_256RegClassID:
1341 case AMDGPU::TTMP_256RegClassID:
1342
1343
1344 case AMDGPU::SGPR_288RegClassID:
1345 case AMDGPU::TTMP_288RegClassID:
1346 case AMDGPU::SGPR_320RegClassID:
1347 case AMDGPU::TTMP_320RegClassID:
1348 case AMDGPU::SGPR_352RegClassID:
1349 case AMDGPU::TTMP_352RegClassID:
1350 case AMDGPU::SGPR_384RegClassID:
1351 case AMDGPU::TTMP_384RegClassID:
1352 case AMDGPU::SGPR_512RegClassID:
1353 case AMDGPU::TTMP_512RegClassID:
1354 shift = 2;
1355 break;
1356
1357
1358 default:
1360 }
1361
1362 if (Val % (1 << shift)) {
1364 << ": scalar reg isn't aligned " << Val;
1365 }
1366
1368}
1369
1371 bool IsHi) const {
1372 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1373 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1374}
1375
1376
1379 if (HasLiteral) {
1382 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1383 if (Literal != Val)
1384 return errOperand(Val, "More than one unique literal is illegal");
1385 }
1386 HasLiteral = true;
1387 Literal = Val;
1389}
1390
1392
1393
1394
1395 if (!HasLiteral) {
1396 if (Bytes.size() < 4) {
1397 return errOperand(0, "cannot read literal, inst bytes left " +
1399 }
1400 HasLiteral = true;
1401 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1402 if (ExtendFP64)
1403 Literal64 <<= 32;
1404 }
1406}
1407
1409 using namespace AMDGPU::EncValues;
1410
1411 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1413 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1414 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1415
1416}
1417
1419 switch (Imm) {
1420 case 240:
1421 return llvm::bit_cast<uint32_t>(0.5f);
1422 case 241:
1423 return llvm::bit_cast<uint32_t>(-0.5f);
1424 case 242:
1425 return llvm::bit_cast<uint32_t>(1.0f);
1426 case 243:
1427 return llvm::bit_cast<uint32_t>(-1.0f);
1428 case 244:
1429 return llvm::bit_cast<uint32_t>(2.0f);
1430 case 245:
1431 return llvm::bit_cast<uint32_t>(-2.0f);
1432 case 246:
1433 return llvm::bit_cast<uint32_t>(4.0f);
1434 case 247:
1435 return llvm::bit_cast<uint32_t>(-4.0f);
1436 case 248:
1437 return 0x3e22f983;
1438 default:
1440 }
1441}
1442
1444 switch (Imm) {
1445 case 240:
1446 return llvm::bit_cast<uint64_t>(0.5);
1447 case 241:
1448 return llvm::bit_cast<uint64_t>(-0.5);
1449 case 242:
1450 return llvm::bit_cast<uint64_t>(1.0);
1451 case 243:
1452 return llvm::bit_cast<uint64_t>(-1.0);
1453 case 244:
1454 return llvm::bit_cast<uint64_t>(2.0);
1455 case 245:
1456 return llvm::bit_cast<uint64_t>(-2.0);
1457 case 246:
1458 return llvm::bit_cast<uint64_t>(4.0);
1459 case 247:
1460 return llvm::bit_cast<uint64_t>(-4.0);
1461 case 248:
1462 return 0x3fc45f306dc9c882;
1463 default:
1465 }
1466}
1467
1469 switch (Imm) {
1470 case 240:
1471 return 0x3800;
1472 case 241:
1473 return 0xB800;
1474 case 242:
1475 return 0x3C00;
1476 case 243:
1477 return 0xBC00;
1478 case 244:
1479 return 0x4000;
1480 case 245:
1481 return 0xC000;
1482 case 246:
1483 return 0x4400;
1484 case 247:
1485 return 0xC400;
1486 case 248:
1487 return 0x3118;
1488 default:
1490 }
1491}
1492
1494 switch (Imm) {
1495 case 240:
1496 return 0x3F00;
1497 case 241:
1498 return 0xBF00;
1499 case 242:
1500 return 0x3F80;
1501 case 243:
1502 return 0xBF80;
1503 case 244:
1504 return 0x4000;
1505 case 245:
1506 return 0xC000;
1507 case 246:
1508 return 0x4080;
1509 case 247:
1510 return 0xC080;
1511 case 248:
1512 return 0x3E22;
1513 default:
1515 }
1516}
1517
1521}
1522
1527
1528
1529
1530
1531
1532 switch (ImmWidth) {
1533 case 0:
1534 case 32:
1536 case 64:
1538 case 16:
1540 default:
1542 }
1543}
1544
1546 using namespace AMDGPU;
1547
1549 switch (Width) {
1550 default:
1554 return VGPR_32RegClassID;
1556 case OPWV232: return VReg_64RegClassID;
1557 case OPW96: return VReg_96RegClassID;
1558 case OPW128: return VReg_128RegClassID;
1559 case OPW192: return VReg_192RegClassID;
1560 case OPW160: return VReg_160RegClassID;
1561 case OPW256: return VReg_256RegClassID;
1562 case OPW288: return VReg_288RegClassID;
1563 case OPW320: return VReg_320RegClassID;
1564 case OPW352: return VReg_352RegClassID;
1565 case OPW384: return VReg_384RegClassID;
1566 case OPW512: return VReg_512RegClassID;
1567 case OPW1024: return VReg_1024RegClassID;
1568 }
1569}
1570
1572 using namespace AMDGPU;
1573
1575 switch (Width) {
1576 default:
1580 return AGPR_32RegClassID;
1582 case OPWV232: return AReg_64RegClassID;
1583 case OPW96: return AReg_96RegClassID;
1584 case OPW128: return AReg_128RegClassID;
1585 case OPW160: return AReg_160RegClassID;
1586 case OPW256: return AReg_256RegClassID;
1587 case OPW288: return AReg_288RegClassID;
1588 case OPW320: return AReg_320RegClassID;
1589 case OPW352: return AReg_352RegClassID;
1590 case OPW384: return AReg_384RegClassID;
1591 case OPW512: return AReg_512RegClassID;
1592 case OPW1024: return AReg_1024RegClassID;
1593 }
1594}
1595
1596
1598 using namespace AMDGPU;
1599
1601 switch (Width) {
1602 default:
1606 return SGPR_32RegClassID;
1608 case OPWV232: return SGPR_64RegClassID;
1609 case OPW96: return SGPR_96RegClassID;
1610 case OPW128: return SGPR_128RegClassID;
1611 case OPW160: return SGPR_160RegClassID;
1612 case OPW256: return SGPR_256RegClassID;
1613 case OPW288: return SGPR_288RegClassID;
1614 case OPW320: return SGPR_320RegClassID;
1615 case OPW352: return SGPR_352RegClassID;
1616 case OPW384: return SGPR_384RegClassID;
1617 case OPW512: return SGPR_512RegClassID;
1618 }
1619}
1620
1622 using namespace AMDGPU;
1623
1625 switch (Width) {
1626 default:
1630 return TTMP_32RegClassID;
1632 case OPWV232: return TTMP_64RegClassID;
1633 case OPW128: return TTMP_128RegClassID;
1634 case OPW256: return TTMP_256RegClassID;
1635 case OPW288: return TTMP_288RegClassID;
1636 case OPW320: return TTMP_320RegClassID;
1637 case OPW352: return TTMP_352RegClassID;
1638 case OPW384: return TTMP_384RegClassID;
1639 case OPW512: return TTMP_512RegClassID;
1640 }
1641}
1642
1644 using namespace AMDGPU::EncValues;
1645
1646 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1647 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1648
1649 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1650}
1651
1653 bool MandatoryLiteral,
1654 unsigned ImmWidth,
1656 using namespace AMDGPU::EncValues;
1657
1658 assert(Val < 1024);
1659
1660 bool IsAGPR = Val & 512;
1661 Val &= 511;
1662
1663 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1666 }
1667 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1668 Sema);
1669}
1670
1673 bool MandatoryLiteral, unsigned ImmWidth,
1675
1676
1677 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1678 using namespace AMDGPU::EncValues;
1679
1681
1682 static_assert(SGPR_MIN == 0);
1684 }
1685
1687 if (TTmpIdx >= 0) {
1689 }
1690
1691 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1693
1694 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1696
1697 if (Val == LITERAL_CONST) {
1698 if (MandatoryLiteral)
1699
1702 }
1703
1704 switch (Width) {
1717 default:
1719 }
1720}
1721
1722
1723
1725 unsigned Val) const {
1726 int VDstXInd =
1728 assert(VDstXInd != -1);
1731 Val |= ~XDstReg & 1;
1734}
1735
1737 using namespace AMDGPU;
1738
1739 switch (Val) {
1740
1751 case 124:
1753 case 125:
1761 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1766 default: break;
1767
1768 }
1769 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1770}
1771
1773 using namespace AMDGPU;
1774
1775 switch (Val) {
1781 case 124:
1784 break;
1785 case 125:
1788 break;
1794 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1798 default: break;
1799 }
1800 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1801}
1802
1804 using namespace AMDGPU;
1805
1806 switch (Val) {
1807 case 124:
1810 break;
1811 case 125:
1814 break;
1815 default:
1816 break;
1817 }
1818 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1819}
1820
1823 unsigned ImmWidth,
1825 using namespace AMDGPU::SDWA;
1826 using namespace AMDGPU::EncValues;
1827
1830
1831
1832 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1833 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1835 Val - SDWA9EncValues::SRC_VGPR_MIN);
1836 }
1837 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1838 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1839 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1841 Val - SDWA9EncValues::SRC_SGPR_MIN);
1842 }
1843 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1844 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1846 Val - SDWA9EncValues::SRC_TTMP_MIN);
1847 }
1848
1849 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1850
1851 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1853
1854 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1856
1858 }
1859 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
1862}
1863
1866}
1867
1870}
1871
1873 using namespace AMDGPU::SDWA;
1874
1877 "SDWAVopcDst should be present only on GFX9+");
1878
1879 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
1880
1881 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1882 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1883
1885 if (TTmpIdx >= 0) {
1888 }
1891 }
1893 }
1894 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
1895}
1896
1898 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
1901}
1902
1905}
1906
1911}
1912
1919
1920 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1921
1922
1923 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1925
1929 return V.Code == Version;
1930 });
1933 if (I == Versions.end())
1935 else
1937
1938 if (W64)
1940 if (W32)
1942 if (MDP)
1944
1946}
1947
1949 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1950}
1951
1953
1955 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1956}
1957
1959
1961
1964}
1965
1968}
1969
1972}
1973
1976}
1977
1980}
1981
1983 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1984}
1985
1988}
1989
1990
1991
1992
1993
1994
1995
1996
1997
2001
2004
2005 if (PopCount == 1) {
2006 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2007 } else {
2008 S << "bits in range ("
2009 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2010 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2011 }
2012
2013 return Result;
2014}
2015
2016#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2017#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2018 do { \
2019 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2020 } while (0)
2021#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2022 do { \
2023 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2024 << GET_FIELD(MASK) << '\n'; \
2025 } while (0)
2026
2027#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2028 do { \
2029 if (FourByteBuffer & (MASK)) { \
2030 return createStringError(std::errc::invalid_argument, \
2031 "kernel descriptor " DESC \
2032 " reserved %s set" MSG, \
2033 getBitRangeFromMask((MASK), 0).c_str()); \
2034 } \
2035 } while (0)
2036
2037#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2038#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2039 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2040#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2041 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2042#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2043 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2044
2045
2048 using namespace amdhsa;
2050
2051
2052
2053
2054
2055
2056 uint32_t GranulatedWorkitemVGPRCount =
2057 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2058
2060 (GranulatedWorkitemVGPRCount + 1) *
2062
2063 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084 uint32_t GranulatedWavefrontSGPRCount =
2085 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2086
2089 "must be zero on gfx10+");
2090
2091 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2093
2094 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2096 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2097 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
2098 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2099
2101
2103 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2105 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2107 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2109 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2110
2112
2115 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2116
2118
2121 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2122
2125
2127 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2128
2131 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2132
2134
2137 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
2138
2141 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2142 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2143 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2144 }
2145
2148 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2149
2150 return true;
2151}
2152
2153
2156 using namespace amdhsa;
2160 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2161 else
2162 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2163 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2165 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2167 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2169 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2171 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2173 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2174
2178
2180 ".amdhsa_exception_fp_ieee_invalid_op",
2181 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2183 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2185 ".amdhsa_exception_fp_ieee_div_zero",
2186 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2188 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2190 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2192 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2194 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2195
2197
2198 return true;
2199}
2200
2201
2204 using namespace amdhsa;
2207 KdStream << Indent << ".amdhsa_accum_offset "
2208 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2209 << '\n';
2210
2211 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2212
2214 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2216 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2218
2220 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2222 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2223 } else {
2225 "SHARED_VGPR_COUNT",
2226 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2227 }
2228 } else {
2230 "COMPUTE_PGM_RSRC3",
2231 "must be zero on gfx12+");
2232 }
2233
2234
2237 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2239 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2241 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2244 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2245 } else {
2247 "COMPUTE_PGM_RSRC3",
2248 "must be zero on gfx10");
2249 }
2250
2251
2253 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2254
2255
2258 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2259 } else {
2261 "COMPUTE_PGM_RSRC3",
2262 "must be zero on gfx10 or gfx11");
2263 }
2264
2265
2267 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2268
2269
2272 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2273 } else {
2275 "COMPUTE_PGM_RSRC3",
2276 "must be zero on gfx10");
2277 }
2278 } else if (FourByteBuffer) {
2280 std::errc::invalid_argument,
2281 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2282 }
2283 return true;
2284}
2285#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2286#undef PRINT_DIRECTIVE
2287#undef GET_FIELD
2288#undef CHECK_RESERVED_BITS_IMPL
2289#undef CHECK_RESERVED_BITS
2290#undef CHECK_RESERVED_BITS_MSG
2291#undef CHECK_RESERVED_BITS_DESC
2292#undef CHECK_RESERVED_BITS_DESC_MSG
2293
2294
2295
2297 const char *Msg = "") {
2299 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2301}
2302
2303
2304
2306 unsigned WidthInBytes) {
2307
2308
2310 std::errc::invalid_argument,
2311 "kernel descriptor reserved bits in range (%u:%u) set",
2312 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2313}
2314
2318#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2319 do { \
2320 KdStream << Indent << DIRECTIVE " " \
2321 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2322 } while (0)
2323
2325 uint32_t FourByteBuffer = 0;
2326
2329
2331 DataExtractor DE(Bytes, true, 8);
2332
2333 switch (Cursor.tell()) {
2335 FourByteBuffer = DE.getU32(Cursor);
2336 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2337 << '\n';
2338 return true;
2339
2341 FourByteBuffer = DE.getU32(Cursor);
2342 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2343 << FourByteBuffer << '\n';
2344 return true;
2345
2347 FourByteBuffer = DE.getU32(Cursor);
2348 KdStream << Indent << ".amdhsa_kernarg_size "
2349 << FourByteBuffer << '\n';
2350 return true;
2351
2353
2354 ReservedBytes = DE.getBytes(Cursor, 4);
2355 for (int I = 0; I < 4; ++I) {
2356 if (ReservedBytes[I] != 0)
2358 }
2359 return true;
2360
2362
2363
2364
2365 DE.skip(Cursor, 8);
2366 return true;
2367
2369
2370 ReservedBytes = DE.getBytes(Cursor, 20);
2371 for (int I = 0; I < 20; ++I) {
2372 if (ReservedBytes[I] != 0)
2374 }
2375 return true;
2376
2378 FourByteBuffer = DE.getU32(Cursor);
2380
2382 FourByteBuffer = DE.getU32(Cursor);
2384
2386 FourByteBuffer = DE.getU32(Cursor);
2388
2390 using namespace amdhsa;
2391 TwoByteBuffer = DE.getU16(Cursor);
2392
2394 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2395 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2397 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2399 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2400 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2401 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2403 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2406 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2407 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2408 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2409
2410 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2413
2414
2416 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2418 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2420 }
2423 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2424 }
2425
2428 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2429
2430 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2433 }
2434
2435 return true;
2436
2438 using namespace amdhsa;
2439 TwoByteBuffer = DE.getU16(Cursor);
2440 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2441 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2442 KERNARG_PRELOAD_SPEC_LENGTH);
2443 }
2444
2445 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2446 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2447 KERNARG_PRELOAD_SPEC_OFFSET);
2448 }
2449 return true;
2450
2452
2453 ReservedBytes = DE.getBytes(Cursor, 4);
2454 for (int I = 0; I < 4; ++I) {
2455 if (ReservedBytes[I] != 0)
2457 }
2458 return true;
2459
2460 default:
2461 llvm_unreachable("Unhandled index. Case statements cover everything.");
2462 return true;
2463 }
2464#undef PRINT_DIRECTIVE
2465}
2466
2469
2470
2471 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2473 "kernel descriptor must be 64-byte aligned");
2474
2475
2476
2477
2478
2479
2481 uint16_t KernelCodeProperties =
2484 EnableWavefrontSize32 =
2486 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2487 }
2488
2489 std::string Kd;
2491 KdStream << ".amdhsa_kernel " << KdName << '\n';
2492
2494 while (C && C.tell() < Bytes.size()) {
2496
2498
2499 if (!Res)
2500 return Res;
2501 }
2502 KdStream << ".end_amdhsa_kernel\n";
2503 outs() << KdStream.str();
2504 return true;
2505}
2506
2511
2512
2513
2514
2515
2516
2517
2521 "code object v2 is not supported");
2522 }
2523
2524
2527 Size = 64;
2529 }
2530
2531 return false;
2532}
2533
2534const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2535 int64_t Val) {
2538
2539
2540 if (->isVariable()) {
2542 } else {
2543 int64_t Res = ~Val;
2544 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2545 if (!Valid || Res != Val)
2547 }
2549}
2550
2551
2552
2553
2554
2555
2560
2561 if (!IsBranch) {
2562 return false;
2563 }
2564
2566 if (!Symbols)
2567 return false;
2568
2572 });
2573 if (Result != Symbols->end()) {
2577 return true;
2578 }
2579
2580 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2581 return false;
2582}
2583
2588}
2589
2590
2591
2592
2593
2597 void *DisInfo,
2599 std::unique_ptr &&RelInfo) {
2600 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2601}
2602
2607}
2608
2614}
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertVOPC64DPPInst(MCInst &MI) const
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
void reportWarning(SMLoc L, const Twine &Msg)
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.