AMDGPUDisassembler.cpp Source File (original) (raw)

38using namespace llvm;

40#define DEBUG_TYPE "amdgpu-disassembler"

42#define SGPR_MAX \

43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \

44 : AMDGPU::EncValues::SGPR_MAX_SI)

50 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),

51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),

52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {

58 createConstantSymbolExpr(Symbol, Code);

60 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);

61 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);

62 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);

63}

67}

75}

80 if (OpIdx != -1) {

81 auto *I = MI.begin();

82 std::advance(I, OpIdx);

84 }

85 return OpIdx;

86}

94 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;

96 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))

99}

100

105 if (DAsm->isGFX12Plus()) {

106 Offset = SignExtend64<24>(Imm);

107 } else if (DAsm->isVI()) {

108 Offset = Imm & 0xFFFFF;

109 } else {

110 Offset = SignExtend64<21>(Imm);

111 }

113}

114

118 return addOperand(Inst, DAsm->decodeBoolReg(Val));

119}

120

125 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));

126}

127

131 return addOperand(Inst, DAsm->decodeDpp8FI(Val));

132}

133

134#define DECODE_OPERAND(StaticDecoderName, DecoderName) \

135 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \

136 uint64_t , \

137 const MCDisassembler *Decoder) { \

138 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \

139 return addOperand(Inst, DAsm->DecoderName(Imm)); \

140 }

141

142

143

144#define DECODE_OPERAND_REG_8(RegClass) \

145 static DecodeStatus Decode##RegClass##RegisterClass( \

146 MCInst &Inst, unsigned Imm, uint64_t , \

147 const MCDisassembler *Decoder) { \

148 assert(Imm < (1 << 8) && "8-bit encoding"); \

149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \

150 return addOperand( \

151 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \

152 }

153

154#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \

155 ImmWidth) \

156 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \

157 const MCDisassembler *Decoder) { \

158 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \

159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \

160 return addOperand(Inst, \

161 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \

162 MandatoryLiteral, ImmWidth)); \

163 }

164

167 unsigned Imm, unsigned EncImm,

168 bool MandatoryLiteral, unsigned ImmWidth,

171 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");

173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,

174 ImmWidth, Sema));

175}

176

177

178

179#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \

180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)

181

182

183

184

185

186template <AMDGPUDisassembler::OpWidthTy OpWidth>

191}

192

193

194template <AMDGPUDisassembler::OpWidthTy OpWidth>

198 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,

200}

201

202

203

204

205template <AMDGPUDisassembler::OpWidthTy OpWidth>

208 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,

210}

211

212

213

214template <AMDGPUDisassembler::OpWidthTy OpWidth>

218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,

220}

221

222

223

224

225

226

232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,

234}

235

236

237

243 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,

245}

246

252 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,

254}

255

256

257

258

259

272

286

293

297 assert(isUInt<10>(Imm) && "10-bit encoding expected");

298 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");

299

300 bool IsHi = Imm & (1 << 9);

301 unsigned RegIdx = Imm & 0xff;

303 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

304}

305

309 assert(isUInt<8>(Imm) && "8-bit encoding expected");

310

311 bool IsHi = Imm & (1 << 7);

312 unsigned RegIdx = Imm & 0x7f;

314 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

315}

316

322 assert(isUInt<9>(Imm) && "9-bit encoding expected");

323

326 bool IsHi = Imm & (1 << 7);

327 unsigned RegIdx = Imm & 0x7f;

328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

329 }

330 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(

331 OpWidth, Imm & 0xFF, false, ImmWidth,

333}

334

342 assert(isUInt<9>(Imm) && "9-bit encoding expected");

343

345 bool IsHi = Imm & (1 << 7);

346 unsigned RegIdx = Imm & 0x7f;

347 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

348 }

349 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(

350 OpWidth, Imm & 0xFF, true, ImmWidth,

352}

353

359 assert(isUInt<10>(Imm) && "10-bit encoding expected");

360

363 bool IsHi = Imm & (1 << 9);

364 unsigned RegIdx = Imm & 0xff;

365 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

366 }

367 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(

368 OpWidth, Imm & 0xFF, false, ImmWidth,

370}

371

375 assert(isUInt<10>(Imm) && "10-bit encoding expected");

377

379

380 bool IsHi = Imm & (1 << 9);

381 unsigned RegIdx = Imm & 0xff;

382 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

383}

384

389 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));

390}

391

395 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));

396}

397

400 if (OpIdx < 0)

401 return false;

402

404 if (.isReg())

405 return false;

406

407 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);

408 auto Reg = Sub ? Sub : Op.getReg();

409 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;

410}

411

416 if (!DAsm->isGFX90A()) {

417 Imm &= 511;

418 } else {

419

420

421

422

423

425 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;

427 : AMDGPU::OpName::vdata;

428 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();

433 Imm |= 512;

434 }

435

440 Imm |= 512;

441 }

442 }

443 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));

444}

445

446template <AMDGPUDisassembler::OpWidthTy Opw>

450 return decodeAVLdSt(Inst, Imm, Opw, Decoder);

451}

452

456 assert(Imm < (1 << 9) && "9-bit encoding");

461}

462

463#define DECODE_SDWA(DecName) \

464DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)

465

469

474 return addOperand(Inst, DAsm->decodeVersionImm(Imm));

475}

476

477#include "AMDGPUGenDisassemblerTables.inc"

478

479

480

481

482

485 const auto Res =

486 support::endian::read<T, llvm::endianness::little>(Bytes.data());

487 Bytes = Bytes.slice(sizeof(T));

488 return Res;

489}

490

494 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());

495 Bytes = Bytes.slice(8);

497 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());

498 Bytes = Bytes.slice(4);

500}

501

505 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());

506 Bytes = Bytes.slice(8);

508 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());

509 Bytes = Bytes.slice(8);

511}

512

517 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());

518 Bytes = Bytes_.slice(0, MaxInstBytesNum);

519

520

521

522 Size = std::min((size_t)4, Bytes_.size());

523

524 do {

525

526

527

528

529

532

534 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,

536 break;

537

539 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,

541 break;

542

545 break;

546

547

548 Bytes = Bytes_.slice(0, MaxInstBytesNum);

549

550 } else if (Bytes.size() >= 16 &&

554 break;

555

556

557 Bytes = Bytes_.slice(0, MaxInstBytesNum);

558 }

559

560 if (Bytes.size() >= 8) {

561 const uint64_t QW = eatBytes<uint64_t>(Bytes);

562

563 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&

565 break;

566

567 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&

569 break;

570

571 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&

573 break;

574

575

576

577

578 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&

580 break;

581

582 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&

584 break;

585

586 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&

588 break;

589

592 break;

593

595 break;

596

598 break;

599

601 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,

603 break;

604

606 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,

608 break;

609

612 break;

613

616 break;

617

618

619 Bytes = Bytes_.slice(0, MaxInstBytesNum);

620 }

621

622

623 if (Bytes.size() >= 4) {

624 const uint32_t DW = eatBytes<uint32_t>(Bytes);

625

628 break;

629

631 break;

632

634 break;

635

636 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&

638 break;

639

640 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&

642 break;

643

644 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&

646 break;

647

649 break;

650

652 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,

654 break;

655

657 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,

659 break;

660 }

661

663 } while (false);

664

668

676 -1)

680 }

681

683

685

687 AMDGPU::OpName::src2_modifiers);

688 }

689

690 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||

691 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {

692

694 AMDGPU::OpName::src2_modifiers);

695 }

696

700 }

701

702 if (MCII->get(MI.getOpcode()).TSFlags &

705 AMDGPU::OpName::cpol);

706 if (CPolPos != -1) {

707 unsigned CPol =

710 if (MI.getNumOperands() <= (unsigned)CPolPos) {

712 AMDGPU::OpName::cpol);

713 } else if (CPol) {

714 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);

715 }

716 }

717 }

718

719 if ((MCII->get(MI.getOpcode()).TSFlags &

722

723 int TFEOpIdx =

725 if (TFEOpIdx != -1) {

726 auto *TFEIter = MI.begin();

727 std::advance(TFEIter, TFEOpIdx);

729 }

730 }

731

732 if (MCII->get(MI.getOpcode()).TSFlags &

734 int SWZOpIdx =

736 if (SWZOpIdx != -1) {

737 auto *SWZIter = MI.begin();

738 std::advance(SWZIter, SWZOpIdx);

740 }

741 }

742

744 int VAddr0Idx =

746 int RsrcIdx =

748 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;

749 if (VAddr0Idx >= 0 && NSAArgs > 0) {

750 unsigned NSAWords = (NSAArgs + 3) / 4;

751 if (Bytes.size() < 4 * NSAWords)

753 for (unsigned i = 0; i < NSAArgs; ++i) {

754 const unsigned VAddrIdx = VAddr0Idx + 1 + i;

755 auto VAddrRCID =

756 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;

758 }

759 Bytes = Bytes.slice(4 * NSAWords);

760 }

761

763 }

764

765 if (MCII->get(MI.getOpcode()).TSFlags &

768

771

774

777

780

782 AMDGPU::OpName::vdst_in);

783 if (VDstIn_Idx != -1) {

784 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,

786 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||

787 .getOperand(VDstIn_Idx).isReg() ||

788 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {

789 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)

790 MI.erase(&MI.getOperand(VDstIn_Idx));

793 AMDGPU::OpName::vdst_in);

794 }

795 }

796

797 int ImmLitIdx =

800 if (ImmLitIdx != -1 && !IsSOPK)

802

803 Size = MaxInstBytesNum - Bytes.size();

805}

806

809

810

813 }

814}

815

818 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||

819 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||

820 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||

821 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||

822 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||

823 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||

824 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||

825 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||

826 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||

827 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||

828 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||

829 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||

830 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||

831 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||

832 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||

833 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {

834

835

837 }

838}

839

844

846 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {

848 if (SDst != -1) {

849

851 AMDGPU::OpName::sdst);

852 } else {

853

855 }

856 }

857}

858

859

860

863 switch (NumRegs) {

864 case 4:

865 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));

866 case 6:

868 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));

869 case 8:

870

871 return;

872 default:

874 }

875}

876

877

878

879

880

881

883 int BlgpIdx =

885 if (BlgpIdx == -1)

886 return;

887

888 int CbszIdx =

890

891 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();

892 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();

893

896 if (!AdjustedRegClassOpcode ||

897 AdjustedRegClassOpcode->Opcode == MI.getOpcode())

898 return;

899

900 MI.setOpcode(AdjustedRegClassOpcode->Opcode);

901 int Src0Idx =

903 int Src1Idx =

909}

910

912 unsigned OpSel = 0;

913 unsigned OpSelHi = 0;

914 unsigned NegLo = 0;

915 unsigned NegHi = 0;

916};

917

918

919

920

922 bool IsVOP3P = false) {

924 unsigned Opc = MI.getOpcode();

925 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,

926 AMDGPU::OpName::src1_modifiers,

927 AMDGPU::OpName::src2_modifiers};

928 for (int J = 0; J < 3; ++J) {

930 if (OpIdx == -1)

931 continue;

932

933 unsigned Val = MI.getOperand(OpIdx).getImm();

934

936 if (IsVOP3P) {

940 } else if (J == 0) {

942 }

943 }

944

945 return Modifiers;

946}

947

948

949

951 const unsigned Opc = MI.getOpcode();

953 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);

954 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {

955 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,

957 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,

959 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,

961 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,

963 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {

966 if (OpIdx == -1 || OpModsIdx == -1)

967 continue;

969 if (.isReg())

970 continue;

971 if (!ConversionRC.contains(Op.getReg()))

972 continue;

974 const MCOperand &OpMods = MI.getOperand(OpModsIdx);

975 unsigned ModVal = OpMods.getImm();

976 if (ModVal & OpSelMask) {

978 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));

979 }

980 }

981}

982

983

984

986 constexpr int DST_IDX = 0;

987 auto Opcode = MI.getOpcode();

988 const auto &Desc = MCII->get(Opcode);

990

991 if (OldIdx != -1 && Desc.getOperandConstraint(

997 (void)DST_IDX;

998 return true;

999 }

1000

1001 return false;

1002}

1003

1004

1006 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());

1009 AMDGPU::OpName::src2_modifiers);

1010}

1011

1013 unsigned Opc = MI.getOpcode();

1014

1015 int VDstInIdx =

1017 if (VDstInIdx != -1)

1019

1020 unsigned DescNumOps = MCII->get(Opc).getNumOperands();

1021 if (MI.getNumOperands() < DescNumOps &&

1026 AMDGPU::OpName::op_sel);

1027 } else {

1028

1029 if (MI.getNumOperands() < DescNumOps &&

1032 AMDGPU::OpName::src0_modifiers);

1033

1034 if (MI.getNumOperands() < DescNumOps &&

1037 AMDGPU::OpName::src1_modifiers);

1038 }

1039}

1040

1043

1044 int VDstInIdx =

1046 if (VDstInIdx != -1)

1048

1049 unsigned Opc = MI.getOpcode();

1050 unsigned DescNumOps = MCII->get(Opc).getNumOperands();

1051 if (MI.getNumOperands() < DescNumOps &&

1055 AMDGPU::OpName::op_sel);

1056 }

1057}

1058

1059

1060

1061

1063 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;

1064

1066 AMDGPU::OpName::vdst);

1067

1069 AMDGPU::OpName::vdata);

1070 int VAddr0Idx =

1072 int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc

1073 : AMDGPU::OpName::rsrc;

1076 AMDGPU::OpName::dmask);

1077

1079 AMDGPU::OpName::tfe);

1081 AMDGPU::OpName::d16);

1082

1086

1087 assert(VDataIdx != -1);

1088 if (BaseOpcode->BVH) {

1089

1091 return;

1092 }

1093

1094 bool IsAtomic = (VDstIdx != -1);

1097 bool IsNSA = false;

1098 bool IsPartialNSA = false;

1099 unsigned AddrSize = Info->VAddrDwords;

1100

1102 unsigned DimIdx =

1104 int A16Idx =

1108 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());

1109

1110 AddrSize =

1112

1113

1114

1115 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||

1116 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||

1117 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;

1118 if (!IsNSA) {

1119 if (!IsVSample && AddrSize > 12)

1120 AddrSize = 16;

1121 } else {

1122 if (AddrSize > Info->VAddrDwords) {

1123 if ( STI .hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {

1124

1125

1126 return;

1127 }

1128 IsPartialNSA = true;

1129 }

1130 }

1131 }

1132

1133 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;

1134 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);

1135

1136 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();

1138 DstSize = (DstSize + 1) / 2;

1139 }

1140

1141 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())

1142 DstSize += 1;

1143

1144 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)

1145 return;

1146

1147 int NewOpcode =

1149 if (NewOpcode == -1)

1150 return;

1151

1152

1154 if (DstSize != Info->VDataDwords) {

1155 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;

1156

1157

1158 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();

1160 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;

1161

1164 if (!NewVdata) {

1165

1166

1167 return;

1168 }

1169 }

1170

1171

1172

1173 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;

1175 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&

1176 AddrSize != Info->VAddrDwords) {

1177 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();

1179 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;

1180

1181 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;

1184 if (!NewVAddrSA)

1185 return;

1186 }

1187

1188 MI.setOpcode(NewOpcode);

1189

1190 if (NewVdata != AMDGPU::NoRegister) {

1192

1193 if (IsAtomic) {

1194

1196 }

1197 }

1198

1199 if (NewVAddrSA) {

1201 } else if (IsNSA) {

1202 assert(AddrSize <= Info->VAddrDwords);

1203 MI.erase(MI.begin() + VAddr0Idx + AddrSize,

1204 MI.begin() + VAddr0Idx + Info->VAddrDwords);

1205 }

1206}

1207

1208

1209

1210

1212 unsigned Opc = MI.getOpcode();

1213 unsigned DescNumOps = MCII->get(Opc).getNumOperands();

1215

1216 if (MI.getNumOperands() < DescNumOps &&

1219

1220 if (MI.getNumOperands() < DescNumOps &&

1223 AMDGPU::OpName::op_sel);

1224 if (MI.getNumOperands() < DescNumOps &&

1227 AMDGPU::OpName::op_sel_hi);

1228 if (MI.getNumOperands() < DescNumOps &&

1231 AMDGPU::OpName::neg_lo);

1232 if (MI.getNumOperands() < DescNumOps &&

1235 AMDGPU::OpName::neg_hi);

1236}

1237

1238

1240 unsigned Opc = MI.getOpcode();

1241 unsigned DescNumOps = MCII->get(Opc).getNumOperands();

1242

1243 if (MI.getNumOperands() < DescNumOps &&

1246

1247 if (MI.getNumOperands() < DescNumOps &&

1250 AMDGPU::OpName::src0_modifiers);

1251

1252 if (MI.getNumOperands() < DescNumOps &&

1255 AMDGPU::OpName::src1_modifiers);

1256}

1257

1259 unsigned Opc = MI.getOpcode();

1260 unsigned DescNumOps = MCII->get(Opc).getNumOperands();

1261

1263

1264 if (MI.getNumOperands() < DescNumOps &&

1268 AMDGPU::OpName::op_sel);

1269 }

1270}

1271

1273 assert(HasLiteral && "Should have decoded a literal");

1275 unsigned DescNumOps = Desc.getNumOperands();

1277 AMDGPU::OpName::immDeferred);

1278 assert(DescNumOps == MI.getNumOperands());

1279 for (unsigned I = 0; I < DescNumOps; ++I) {

1280 auto &Op = MI.getOperand(I);

1281 auto OpType = Desc.operands()[I].OperandType;

1285 IsDeferredOp)

1286 Op.setImm(Literal);

1287 }

1288}

1289

1293}

1294

1295inline

1297 const Twine& ErrMsg) const {

1299

1300

1301

1303}

1304

1305inline

1308}

1309

1310inline

1312 unsigned Val) const {

1313 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];

1314 if (Val >= RegCl.getNumRegs())

1316 ": unknown register " + Twine(Val));

1318}

1319

1320inline

1322 unsigned Val) const {

1323

1324

1325 int shift = 0;

1326 switch (SRegClassID) {

1327 case AMDGPU::SGPR_32RegClassID:

1328 case AMDGPU::TTMP_32RegClassID:

1329 break;

1330 case AMDGPU::SGPR_64RegClassID:

1331 case AMDGPU::TTMP_64RegClassID:

1332 shift = 1;

1333 break;

1334 case AMDGPU::SGPR_96RegClassID:

1335 case AMDGPU::TTMP_96RegClassID:

1336 case AMDGPU::SGPR_128RegClassID:

1337 case AMDGPU::TTMP_128RegClassID:

1338

1339

1340 case AMDGPU::SGPR_256RegClassID:

1341 case AMDGPU::TTMP_256RegClassID:

1342

1343

1344 case AMDGPU::SGPR_288RegClassID:

1345 case AMDGPU::TTMP_288RegClassID:

1346 case AMDGPU::SGPR_320RegClassID:

1347 case AMDGPU::TTMP_320RegClassID:

1348 case AMDGPU::SGPR_352RegClassID:

1349 case AMDGPU::TTMP_352RegClassID:

1350 case AMDGPU::SGPR_384RegClassID:

1351 case AMDGPU::TTMP_384RegClassID:

1352 case AMDGPU::SGPR_512RegClassID:

1353 case AMDGPU::TTMP_512RegClassID:

1354 shift = 2;

1355 break;

1356

1357

1358 default:

1360 }

1361

1362 if (Val % (1 << shift)) {

1364 << ": scalar reg isn't aligned " << Val;

1365 }

1366

1368}

1369

1371 bool IsHi) const {

1372 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);

1373 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);

1374}

1375

1376

1379 if (HasLiteral) {

1382 "Should only decode multiple kimm with VOPD, check VSrc operand types");

1383 if (Literal != Val)

1384 return errOperand(Val, "More than one unique literal is illegal");

1385 }

1386 HasLiteral = true;

1387 Literal = Val;

1389}

1390

1392

1393

1394

1395 if (!HasLiteral) {

1396 if (Bytes.size() < 4) {

1397 return errOperand(0, "cannot read literal, inst bytes left " +

1399 }

1400 HasLiteral = true;

1401 Literal = Literal64 = eatBytes<uint32_t>(Bytes);

1402 if (ExtendFP64)

1403 Literal64 <<= 32;

1404 }

1406}

1407

1409 using namespace AMDGPU::EncValues;

1410

1411 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);

1413 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :

1414 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));

1415

1416}

1417

1419 switch (Imm) {

1420 case 240:

1421 return llvm::bit_cast<uint32_t>(0.5f);

1422 case 241:

1423 return llvm::bit_cast<uint32_t>(-0.5f);

1424 case 242:

1425 return llvm::bit_cast<uint32_t>(1.0f);

1426 case 243:

1427 return llvm::bit_cast<uint32_t>(-1.0f);

1428 case 244:

1429 return llvm::bit_cast<uint32_t>(2.0f);

1430 case 245:

1431 return llvm::bit_cast<uint32_t>(-2.0f);

1432 case 246:

1433 return llvm::bit_cast<uint32_t>(4.0f);

1434 case 247:

1435 return llvm::bit_cast<uint32_t>(-4.0f);

1436 case 248:

1437 return 0x3e22f983;

1438 default:

1440 }

1441}

1442

1444 switch (Imm) {

1445 case 240:

1446 return llvm::bit_cast<uint64_t>(0.5);

1447 case 241:

1448 return llvm::bit_cast<uint64_t>(-0.5);

1449 case 242:

1450 return llvm::bit_cast<uint64_t>(1.0);

1451 case 243:

1452 return llvm::bit_cast<uint64_t>(-1.0);

1453 case 244:

1454 return llvm::bit_cast<uint64_t>(2.0);

1455 case 245:

1456 return llvm::bit_cast<uint64_t>(-2.0);

1457 case 246:

1458 return llvm::bit_cast<uint64_t>(4.0);

1459 case 247:

1460 return llvm::bit_cast<uint64_t>(-4.0);

1461 case 248:

1462 return 0x3fc45f306dc9c882;

1463 default:

1465 }

1466}

1467

1469 switch (Imm) {

1470 case 240:

1471 return 0x3800;

1472 case 241:

1473 return 0xB800;

1474 case 242:

1475 return 0x3C00;

1476 case 243:

1477 return 0xBC00;

1478 case 244:

1479 return 0x4000;

1480 case 245:

1481 return 0xC000;

1482 case 246:

1483 return 0x4400;

1484 case 247:

1485 return 0xC400;

1486 case 248:

1487 return 0x3118;

1488 default:

1490 }

1491}

1492

1494 switch (Imm) {

1495 case 240:

1496 return 0x3F00;

1497 case 241:

1498 return 0xBF00;

1499 case 242:

1500 return 0x3F80;

1501 case 243:

1502 return 0xBF80;

1503 case 244:

1504 return 0x4000;

1505 case 245:

1506 return 0xC000;

1507 case 246:

1508 return 0x4080;

1509 case 247:

1510 return 0xC080;

1511 case 248:

1512 return 0x3E22;

1513 default:

1515 }

1516}

1517

1521}

1522

1527

1528

1529

1530

1531

1532 switch (ImmWidth) {

1533 case 0:

1534 case 32:

1536 case 64:

1538 case 16:

1540 default:

1542 }

1543}

1544

1546 using namespace AMDGPU;

1547

1549 switch (Width) {

1550 default:

1554 return VGPR_32RegClassID;

1556 case OPWV232: return VReg_64RegClassID;

1557 case OPW96: return VReg_96RegClassID;

1558 case OPW128: return VReg_128RegClassID;

1559 case OPW192: return VReg_192RegClassID;

1560 case OPW160: return VReg_160RegClassID;

1561 case OPW256: return VReg_256RegClassID;

1562 case OPW288: return VReg_288RegClassID;

1563 case OPW320: return VReg_320RegClassID;

1564 case OPW352: return VReg_352RegClassID;

1565 case OPW384: return VReg_384RegClassID;

1566 case OPW512: return VReg_512RegClassID;

1567 case OPW1024: return VReg_1024RegClassID;

1568 }

1569}

1570

1572 using namespace AMDGPU;

1573

1575 switch (Width) {

1576 default:

1580 return AGPR_32RegClassID;

1582 case OPWV232: return AReg_64RegClassID;

1583 case OPW96: return AReg_96RegClassID;

1584 case OPW128: return AReg_128RegClassID;

1585 case OPW160: return AReg_160RegClassID;

1586 case OPW256: return AReg_256RegClassID;

1587 case OPW288: return AReg_288RegClassID;

1588 case OPW320: return AReg_320RegClassID;

1589 case OPW352: return AReg_352RegClassID;

1590 case OPW384: return AReg_384RegClassID;

1591 case OPW512: return AReg_512RegClassID;

1592 case OPW1024: return AReg_1024RegClassID;

1593 }

1594}

1595

1596

1598 using namespace AMDGPU;

1599

1601 switch (Width) {

1602 default:

1606 return SGPR_32RegClassID;

1608 case OPWV232: return SGPR_64RegClassID;

1609 case OPW96: return SGPR_96RegClassID;

1610 case OPW128: return SGPR_128RegClassID;

1611 case OPW160: return SGPR_160RegClassID;

1612 case OPW256: return SGPR_256RegClassID;

1613 case OPW288: return SGPR_288RegClassID;

1614 case OPW320: return SGPR_320RegClassID;

1615 case OPW352: return SGPR_352RegClassID;

1616 case OPW384: return SGPR_384RegClassID;

1617 case OPW512: return SGPR_512RegClassID;

1618 }

1619}

1620

1622 using namespace AMDGPU;

1623

1625 switch (Width) {

1626 default:

1630 return TTMP_32RegClassID;

1632 case OPWV232: return TTMP_64RegClassID;

1633 case OPW128: return TTMP_128RegClassID;

1634 case OPW256: return TTMP_256RegClassID;

1635 case OPW288: return TTMP_288RegClassID;

1636 case OPW320: return TTMP_320RegClassID;

1637 case OPW352: return TTMP_352RegClassID;

1638 case OPW384: return TTMP_384RegClassID;

1639 case OPW512: return TTMP_512RegClassID;

1640 }

1641}

1642

1644 using namespace AMDGPU::EncValues;

1645

1646 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;

1647 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;

1648

1649 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;

1650}

1651

1653 bool MandatoryLiteral,

1654 unsigned ImmWidth,

1656 using namespace AMDGPU::EncValues;

1657

1658 assert(Val < 1024);

1659

1660 bool IsAGPR = Val & 512;

1661 Val &= 511;

1662

1663 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {

1666 }

1667 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,

1668 Sema);

1669}

1670

1673 bool MandatoryLiteral, unsigned ImmWidth,

1675

1676

1677 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");

1678 using namespace AMDGPU::EncValues;

1679

1681

1682 static_assert(SGPR_MIN == 0);

1684 }

1685

1687 if (TTmpIdx >= 0) {

1689 }

1690

1691 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)

1693

1694 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)

1696

1697 if (Val == LITERAL_CONST) {

1698 if (MandatoryLiteral)

1699

1702 }

1703

1704 switch (Width) {

1717 default:

1719 }

1720}

1721

1722

1723

1725 unsigned Val) const {

1726 int VDstXInd =

1728 assert(VDstXInd != -1);

1731 Val |= ~XDstReg & 1;

1734}

1735

1737 using namespace AMDGPU;

1738

1739 switch (Val) {

1740

1751 case 124:

1753 case 125:

1761 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);

1766 default: break;

1767

1768 }

1769 return errOperand(Val, "unknown operand encoding " + Twine(Val));

1770}

1771

1773 using namespace AMDGPU;

1774

1775 switch (Val) {

1781 case 124:

1784 break;

1785 case 125:

1788 break;

1794 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);

1798 default: break;

1799 }

1800 return errOperand(Val, "unknown operand encoding " + Twine(Val));

1801}

1802

1804 using namespace AMDGPU;

1805

1806 switch (Val) {

1807 case 124:

1810 break;

1811 case 125:

1814 break;

1815 default:

1816 break;

1817 }

1818 return errOperand(Val, "unknown operand encoding " + Twine(Val));

1819}

1820

1823 unsigned ImmWidth,

1825 using namespace AMDGPU::SDWA;

1826 using namespace AMDGPU::EncValues;

1827

1830

1831

1832 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&

1833 Val <= SDWA9EncValues::SRC_VGPR_MAX) {

1835 Val - SDWA9EncValues::SRC_VGPR_MIN);

1836 }

1837 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&

1838 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10

1839 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {

1841 Val - SDWA9EncValues::SRC_SGPR_MIN);

1842 }

1843 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&

1844 Val <= SDWA9EncValues::SRC_TTMP_MAX) {

1846 Val - SDWA9EncValues::SRC_TTMP_MIN);

1847 }

1848

1849 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;

1850

1851 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)

1853

1854 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)

1856

1858 }

1859 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))

1862}

1863

1866}

1867

1870}

1871

1873 using namespace AMDGPU::SDWA;

1874

1877 "SDWAVopcDst should be present only on GFX9+");

1878

1879 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);

1880

1881 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {

1882 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;

1883

1885 if (TTmpIdx >= 0) {

1888 }

1891 }

1893 }

1894 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);

1895}

1896

1898 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)

1901}

1902

1905}

1906

1911}

1912

1919

1920 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);

1921

1922

1923 if (Encoding::encode(Version, W64, W32, MDP) != Imm)

1925

1929 return V.Code == Version;

1930 });

1933 if (I == Versions.end())

1935 else

1937

1938 if (W64)

1940 if (W32)

1942 if (MDP)

1944

1946}

1947

1949 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);

1950}

1951

1953

1955 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);

1956}

1957

1959

1961

1964}

1965

1968}

1969

1972}

1973

1976}

1977

1980}

1981

1983 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);

1984}

1985

1988}

1989

1990

1991

1992

1993

1994

1995

1996

1997

2001

2004

2005 if (PopCount == 1) {

2006 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';

2007 } else {

2008 S << "bits in range ("

2009 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'

2010 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';

2011 }

2012

2013 return Result;

2014}

2015

2016#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))

2017#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \

2018 do { \

2019 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \

2020 } while (0)

2021#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \

2022 do { \

2023 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \

2024 << GET_FIELD(MASK) << '\n'; \

2025 } while (0)

2026

2027#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \

2028 do { \

2029 if (FourByteBuffer & (MASK)) { \

2030 return createStringError(std::errc::invalid_argument, \

2031 "kernel descriptor " DESC \

2032 " reserved %s set" MSG, \

2033 getBitRangeFromMask((MASK), 0).c_str()); \

2034 } \

2035 } while (0)

2036

2037#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")

2038#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \

2039 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)

2040#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \

2041 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")

2042#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \

2043 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)

2044

2045

2048 using namespace amdhsa;

2050

2051

2052

2053

2054

2055

2056 uint32_t GranulatedWorkitemVGPRCount =

2057 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);

2058

2060 (GranulatedWorkitemVGPRCount + 1) *

2062

2063 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';

2064

2065

2066

2067

2068

2069

2070

2071

2072

2073

2074

2075

2076

2077

2078

2079

2080

2081

2082

2083

2084 uint32_t GranulatedWavefrontSGPRCount =

2085 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);

2086

2089 "must be zero on gfx10+");

2090

2091 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *

2093

2094 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';

2096 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';

2097 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';

2098 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";

2099

2101

2103 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);

2105 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);

2107 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);

2109 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);

2110

2112

2115 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);

2116

2118

2121 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);

2122

2125

2127 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);

2128

2131 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");

2132

2134

2137 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");

2138

2141 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);

2142 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);

2143 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);

2144 }

2145

2148 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);

2149

2150 return true;

2151}

2152

2153

2156 using namespace amdhsa;

2160 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);

2161 else

2162 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",

2163 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);

2165 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);

2167 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);

2169 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);

2171 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);

2173 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);

2174

2178

2180 ".amdhsa_exception_fp_ieee_invalid_op",

2181 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);

2183 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);

2185 ".amdhsa_exception_fp_ieee_div_zero",

2186 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);

2188 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);

2190 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);

2192 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);

2194 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);

2195

2197

2198 return true;

2199}

2200

2201

2204 using namespace amdhsa;

2207 KdStream << Indent << ".amdhsa_accum_offset "

2208 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4

2209 << '\n';

2210

2211 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);

2212

2214 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");

2216 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");

2218

2220 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {

2222 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);

2223 } else {

2225 "SHARED_VGPR_COUNT",

2226 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);

2227 }

2228 } else {

2230 "COMPUTE_PGM_RSRC3",

2231 "must be zero on gfx12+");

2232 }

2233

2234

2237 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);

2239 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);

2241 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);

2244 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);

2245 } else {

2247 "COMPUTE_PGM_RSRC3",

2248 "must be zero on gfx10");

2249 }

2250

2251

2253 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");

2254

2255

2258 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);

2259 } else {

2261 "COMPUTE_PGM_RSRC3",

2262 "must be zero on gfx10 or gfx11");

2263 }

2264

2265

2267 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");

2268

2269

2272 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);

2273 } else {

2275 "COMPUTE_PGM_RSRC3",

2276 "must be zero on gfx10");

2277 }

2278 } else if (FourByteBuffer) {

2280 std::errc::invalid_argument,

2281 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");

2282 }

2283 return true;

2284}

2285#undef PRINT_PSEUDO_DIRECTIVE_COMMENT

2286#undef PRINT_DIRECTIVE

2287#undef GET_FIELD

2288#undef CHECK_RESERVED_BITS_IMPL

2289#undef CHECK_RESERVED_BITS

2290#undef CHECK_RESERVED_BITS_MSG

2291#undef CHECK_RESERVED_BITS_DESC

2292#undef CHECK_RESERVED_BITS_DESC_MSG

2293

2294

2295

2297 const char *Msg = "") {

2299 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",

2301}

2302

2303

2304

2306 unsigned WidthInBytes) {

2307

2308

2310 std::errc::invalid_argument,

2311 "kernel descriptor reserved bits in range (%u:%u) set",

2312 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);

2313}

2314

2318#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \

2319 do { \

2320 KdStream << Indent << DIRECTIVE " " \

2321 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \

2322 } while (0)

2323

2325 uint32_t FourByteBuffer = 0;

2326

2329

2331 DataExtractor DE(Bytes, true, 8);

2332

2333 switch (Cursor.tell()) {

2335 FourByteBuffer = DE.getU32(Cursor);

2336 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer

2337 << '\n';

2338 return true;

2339

2341 FourByteBuffer = DE.getU32(Cursor);

2342 KdStream << Indent << ".amdhsa_private_segment_fixed_size "

2343 << FourByteBuffer << '\n';

2344 return true;

2345

2347 FourByteBuffer = DE.getU32(Cursor);

2348 KdStream << Indent << ".amdhsa_kernarg_size "

2349 << FourByteBuffer << '\n';

2350 return true;

2351

2353

2354 ReservedBytes = DE.getBytes(Cursor, 4);

2355 for (int I = 0; I < 4; ++I) {

2356 if (ReservedBytes[I] != 0)

2358 }

2359 return true;

2360

2362

2363

2364

2365 DE.skip(Cursor, 8);

2366 return true;

2367

2369

2370 ReservedBytes = DE.getBytes(Cursor, 20);

2371 for (int I = 0; I < 20; ++I) {

2372 if (ReservedBytes[I] != 0)

2374 }

2375 return true;

2376

2378 FourByteBuffer = DE.getU32(Cursor);

2380

2382 FourByteBuffer = DE.getU32(Cursor);

2384

2386 FourByteBuffer = DE.getU32(Cursor);

2388

2390 using namespace amdhsa;

2391 TwoByteBuffer = DE.getU16(Cursor);

2392

2394 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",

2395 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);

2397 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);

2399 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);

2400 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",

2401 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);

2403 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);

2406 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);

2407 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",

2408 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);

2409

2410 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)

2413

2414

2416 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {

2418 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,

2420 }

2423 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);

2424 }

2425

2428 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);

2429

2430 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {

2433 }

2434

2435 return true;

2436

2438 using namespace amdhsa;

2439 TwoByteBuffer = DE.getU16(Cursor);

2440 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {

2441 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",

2442 KERNARG_PRELOAD_SPEC_LENGTH);

2443 }

2444

2445 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {

2446 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",

2447 KERNARG_PRELOAD_SPEC_OFFSET);

2448 }

2449 return true;

2450

2452

2453 ReservedBytes = DE.getBytes(Cursor, 4);

2454 for (int I = 0; I < 4; ++I) {

2455 if (ReservedBytes[I] != 0)

2457 }

2458 return true;

2459

2460 default:

2461 llvm_unreachable("Unhandled index. Case statements cover everything.");

2462 return true;

2463 }

2464#undef PRINT_DIRECTIVE

2465}

2466

2469

2470

2471 if (Bytes.size() != 64 || KdAddress % 64 != 0)

2473 "kernel descriptor must be 64-byte aligned");

2474

2475

2476

2477

2478

2479

2481 uint16_t KernelCodeProperties =

2484 EnableWavefrontSize32 =

2486 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);

2487 }

2488

2489 std::string Kd;

2491 KdStream << ".amdhsa_kernel " << KdName << '\n';

2492

2494 while (C && C.tell() < Bytes.size()) {

2496

2498

2499 if (!Res)

2500 return Res;

2501 }

2502 KdStream << ".end_amdhsa_kernel\n";

2503 outs() << KdStream.str();

2504 return true;

2505}

2506

2511

2512

2513

2514

2515

2516

2517

2521 "code object v2 is not supported");

2522 }

2523

2524

2527 Size = 64;

2529 }

2530

2531 return false;

2532}

2533

2534const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,

2535 int64_t Val) {

2538

2539

2540 if ( Sym ->isVariable()) {

2542 } else {

2543 int64_t Res = ~Val;

2544 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);

2545 if (!Valid || Res != Val)

2547 }

2549}

2550

2551

2552

2553

2554

2555

2560

2561 if (!IsBranch) {

2562 return false;

2563 }

2564

2566 if (!Symbols)

2567 return false;

2568

2572 });

2573 if (Result != Symbols->end()) {

2577 return true;

2578 }

2579

2580 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));

2581 return false;

2582}

2583

2588}

2589

2590

2591

2592

2593

2597 void *DisInfo,

2599 std::unique_ptr &&RelInfo) {

2600 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);

2601}

2602

2607}

2608

2614}

unsigned const MachineRegisterInfo * MRI

static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)

#define CHECK_RESERVED_BITS_DESC(MASK, DESC)

static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)

static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)

Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...

static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)

static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)

static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)

#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)

static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)

static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)

static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)

static int64_t getInlineImmValBF16(unsigned Imm)

#define DECODE_SDWA(DecName)

static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)

static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

#define DECODE_OPERAND_REG_8(RegClass)

static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)

#define PRINT_DIRECTIVE(DIRECTIVE, MASK)

static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)

static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static int64_t getInlineImmVal32(unsigned Imm)

#define DECODE_OPERAND_REG_7(RegClass, OpWidth)

static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)

#define CHECK_RESERVED_BITS(MASK)

static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static int64_t getInlineImmVal64(unsigned Imm)

static T eatBytes(ArrayRef< uint8_t > &Bytes)

static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)

static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()

static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)

static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

#define CHECK_RESERVED_BITS_MSG(MASK, MSG)

static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)

static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)

static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)

static int64_t getInlineImmValF16(unsigned Imm)

static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)

Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.

static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)

static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)

#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)

static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")

Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.

static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)

Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...

This file contains declaration for AMDGPU ISA disassembler.

Provides AMDGPU specific target descriptions.

AMDHSA kernel descriptor definitions.

#define AMDHSA_BITS_GET(SRC, MSK)

Analysis containing CSE Info

#define LLVM_EXTERNAL_VISIBILITY

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Interface definition for SIRegisterInfo.

void convertVOPC64DPPInst(MCInst &MI) const

bool hasKernargPreload() const

void convertEXPInst(MCInst &MI) const

MCOperand createRegOperand(unsigned int RegId) const

MCOperand decodeSpecialReg64(unsigned Val) const

const char * getRegClassName(unsigned RegClassID) const

Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const

Decode as directives that handle COMPUTE_PGM_RSRC1.

Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const

void convertVOPCDPPInst(MCInst &MI) const

unsigned getVgprClassId(const OpWidthTy Width) const

unsigned getAgprClassId(const OpWidthTy Width) const

MCOperand decodeSpecialReg96Plus(unsigned Val) const

MCOperand decodeSDWASrc32(unsigned Val) const

void setABIVersion(unsigned Version) override

ELF-specific, set the ABI version from the object header.

Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const

Decode as directives that handle COMPUTE_PGM_RSRC2.

MCOperand decodeDpp8FI(unsigned Val) const

void convertMacDPPInst(MCInst &MI) const

MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const

MCOperand decodeBoolReg(unsigned Val) const

void convertDPP8Inst(MCInst &MI) const

MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const

MCOperand errOperand(unsigned V, const Twine &ErrMsg) const

MCOperand decodeVersionImm(unsigned Imm) const

MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const

Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const

MCOperand decodeSplitBarrier(unsigned Val) const

void convertVOP3DPPInst(MCInst &MI) const

void convertTrue16OpSel(MCInst &MI) const

void convertFMAanyK(MCInst &MI, int ImmLitIdx) const

MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const

MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const

Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const

Decode as directives that handle COMPUTE_PGM_RSRC3.

static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)

MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const

AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)

MCOperand decodeSpecialReg32(unsigned Val) const

MCOperand decodeLiteralConstant(bool ExtendFP64) const

MCOperand decodeSDWAVopcDst(unsigned Val) const

void convertVINTERPInst(MCInst &MI) const

void convertSDWAInst(MCInst &MI) const

DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const

unsigned getSgprClassId(const OpWidthTy Width) const

static MCOperand decodeIntImmed(unsigned Imm)

void convertMAIInst(MCInst &MI) const

f8f6f4 instructions have different pseudos depending on the used formats.

bool hasArchitectedFlatScratch() const

DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override

Returns the disassembly of a single instruction.

unsigned getTtmpClassId(const OpWidthTy Width) const

void convertMIMGInst(MCInst &MI) const

bool isMacDPP(MCInst &MI) const

int getTTmpIdx(unsigned Val) const

void convertVOP3PDPPInst(MCInst &MI) const

MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const

MCOperand decodeSDWASrc16(unsigned Val) const

Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override

Used to perform separate target specific disassembly for a particular symbol.

bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override

Try to add a symbolic operand instead of Value to the MCInst.

void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override

Try to add a comment on the PC-relative load.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

ArrayRef< T > slice(size_t N, size_t M) const

slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.

This class represents an Operation in the Expression.

Lightweight error class with error context and mandatory checking.

Tagged union holding either a T or a Error.

static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)

Context object for machine code objects.

const MCRegisterInfo * getRegisterInfo() const

void reportWarning(SMLoc L, const Twine &Msg)

MCSymbol * getOrCreateSymbol(const Twine &Name)

Lookup the symbol inside with the specified Name.

Superclass for all disassemblers.

MCContext & getContext() const

const MCSubtargetInfo & STI

raw_ostream * CommentStream

DecodeStatus

Ternary decode status.

Base class for the full range of assembler expressions which are needed for parsing.

Instances of this class represent a single low-level machine instruction.

unsigned getNumOperands() const

unsigned getOpcode() const

void addOperand(const MCOperand Op)

const MCOperand & getOperand(unsigned i) const

Describe properties that are true of each instruction in the target description file.

Interface to description of machine instruction set.

Instances of this class represent operands of the MCInst class.

static MCOperand createExpr(const MCExpr *Val)

static MCOperand createReg(MCRegister Reg)

static MCOperand createImm(int64_t Val)

void setReg(MCRegister Reg)

Set the register number.

MCRegister getReg() const

Returns the register number.

MCRegisterClass - Base class of TargetRegisterClass.

unsigned getRegister(unsigned i) const

getRegister - Return the specified register in the class.

bool contains(MCRegister Reg) const

contains - Return true if the specified register is included in this register class.

MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...

MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const

Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.

uint16_t getEncodingValue(MCRegister Reg) const

Returns the encoding for Reg.

const MCRegisterClass & getRegClass(unsigned i) const

Returns the register class associated with the enumeration value.

MCRegister getSubReg(MCRegister Reg, unsigned Idx) const

Returns the physical register number of sub-register "Index" for physical register RegNo.

Wrapper class representing physical registers. Should be passed by value.

Generic base class for all target subtargets.

bool hasFeature(unsigned Feature) const

static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)

MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...

Symbolize and annotate disassembled instructions.

Represents a location in source code.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

StringRef - Represent a constant reference to a string, i.e.

Target - Wrapper for Target specific information.

Triple - Helper class for working with autoconf configuration names.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

LLVM Value Representation.

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

std::string & str()

Returns the string's reference.

A raw_ostream that writes to an SmallVector or SmallString.

const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)

The type for the symbol lookup function.

int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)

The type for the operand information call back function.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)

unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)

ArrayRef< GFXVersion > getGFXVersions()

LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)

If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)

bool isGFX10(const MCSubtargetInfo &STI)

LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)

bool isGFX12Plus(const MCSubtargetInfo &STI)

bool hasPackedD16(const MCSubtargetInfo &STI)

bool isVOPC64DPP(unsigned Opc)

unsigned getAMDHSACodeObjectVersion(const Module &M)

bool isGFX9(const MCSubtargetInfo &STI)

LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)

bool hasG16(const MCSubtargetInfo &STI)

unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)

bool isGFX11Plus(const MCSubtargetInfo &STI)

bool isGFX10Plus(const MCSubtargetInfo &STI)

@ OPERAND_REG_IMM_FP32_DEFERRED

@ OPERAND_REG_IMM_FP16_DEFERRED

bool hasGDS(const MCSubtargetInfo &STI)

bool isGFX9Plus(const MCSubtargetInfo &STI)

unsigned hasKernargPreload(const MCSubtargetInfo &STI)

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

bool hasVOPD(const MCSubtargetInfo &STI)

const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)

@ C

The default llvm calling convention, compatible with C.

@ KERNEL_CODE_PROPERTIES_OFFSET

@ GROUP_SEGMENT_FIXED_SIZE_OFFSET

@ COMPUTE_PGM_RSRC3_OFFSET

@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET

@ COMPUTE_PGM_RSRC1_OFFSET

@ COMPUTE_PGM_RSRC2_OFFSET

@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET

uint16_t read16(const void *P, endianness E)

This is an optimization pass for GlobalISel generic memory operations.

int popcount(T Value) noexcept

Count the number of set bits in a value.

raw_fd_ostream & outs()

This returns a reference to a raw_fd_ostream for standard output.

SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)

Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)

Create formatted StringError object.

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

void cantFail(Error Err, const char *Msg=nullptr)

Report a fatal error if Err is a failure value.

Target & getTheGCNTarget()

The target for GCN GPUs.

std::vector< SymbolInfoTy > SectionSymbolsTy

unsigned M0(unsigned Val)

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Description of the encoding of one expression Op.

static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)

RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.

static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)

RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.