LLVM: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

21#include "llvm/IR/IntrinsicsAMDGPU.h"

22#include "llvm/IR/IntrinsicsR600.h"

30#include

31

32#define GET_INSTRINFO_NAMED_OPS

33#define GET_INSTRMAP_INFO

34#include "AMDGPUGenInstrInfo.inc"

35

39 llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag "

40 "or asm directive still take priority if present)"));

41

42namespace {

43

44

45unsigned getBitMask(unsigned Shift, unsigned Width) {

46 return ((1 << Width) - 1) << Shift;

47}

48

49

50

51

52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {

53 unsigned Mask = getBitMask(Shift, Width);

54 return ((Src << Shift) & Mask) | (Dst & ~Mask);

55}

56

57

58

59

60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {

61 return (Src & getBitMask(Shift, Width)) >> Shift;

62}

63

64

65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {

67}

68

69

70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {

72}

73

74

75unsigned getExpcntBitShift(unsigned VersionMajor) {

77}

78

79

80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }

81

82

83unsigned getLgkmcntBitShift(unsigned VersionMajor) {

85}

86

87

88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {

90}

91

92

93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }

94

95

96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {

97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;

98}

99

100

101unsigned getLoadcntBitWidth(unsigned VersionMajor) {

103}

104

105

106unsigned getSamplecntBitWidth(unsigned VersionMajor) {

108}

109

110

111unsigned getBvhcntBitWidth(unsigned VersionMajor) {

113}

114

115

116unsigned getDscntBitWidth(unsigned VersionMajor) {

118}

119

120

121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }

122

123

124unsigned getStorecntBitWidth(unsigned VersionMajor) {

126}

127

128

129unsigned getKmcntBitWidth(unsigned VersionMajor) {

131}

132

133

134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {

136}

137

138

139unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {

141}

142

143

144inline unsigned getVaSdstBitWidth() { return 3; }

145

146

147inline unsigned getVaSdstBitShift() { return 9; }

148

149

150inline unsigned getVmVsrcBitWidth() { return 3; }

151

152

153inline unsigned getVmVsrcBitShift() { return 2; }

154

155

156inline unsigned getVaVdstBitWidth() { return 4; }

157

158

159inline unsigned getVaVdstBitShift() { return 12; }

160

161

162inline unsigned getVaVccBitWidth() { return 1; }

163

164

165inline unsigned getVaVccBitShift() { return 1; }

166

167

168inline unsigned getSaSdstBitWidth() { return 1; }

169

170

171inline unsigned getSaSdstBitShift() { return 0; }

172

173

174inline unsigned getVaSsrcBitWidth() { return 1; }

175

176

177inline unsigned getVaSsrcBitShift() { return 8; }

178

179

180inline unsigned getHoldCntWidth() { return 1; }

181

182

183inline unsigned getHoldCntBitShift() { return 7; }

184

185}

186

187namespace llvm {

188

190

191

192

196

197

201

204 M.getModuleFlag("amdhsa_code_object_version"))) {

205 return (unsigned)Ver->getZExtValue() / 100;

206 }

207

209}

210

214

216 switch (ABIVersion) {

218 return 4;

220 return 5;

222 return 6;

223 default:

225 }

226}

227

230 return 0;

231

232 switch (CodeObjectVersion) {

233 case 4:

235 case 5:

237 case 6:

239 default:

241 Twine(CodeObjectVersion));

242 }

243}

244

246 switch (CodeObjectVersion) {

248 return 48;

251 default:

253 }

254}

255

256

257

259 switch (CodeObjectVersion) {

261 return 24;

264 default:

266 }

267}

268

270 switch (CodeObjectVersion) {

272 return 32;

275 default:

277 }

278}

279

281 switch (CodeObjectVersion) {

283 return 40;

286 default:

288 }

289}

290

291#define GET_MIMGBaseOpcodesTable_IMPL

292#define GET_MIMGDimInfoTable_IMPL

293#define GET_MIMGInfoTable_IMPL

294#define GET_MIMGLZMappingTable_IMPL

295#define GET_MIMGMIPMappingTable_IMPL

296#define GET_MIMGBiasMappingTable_IMPL

297#define GET_MIMGOffsetMappingTable_IMPL

298#define GET_MIMGG16MappingTable_IMPL

299#define GET_MAIInstInfoTable_IMPL

300#define GET_WMMAInstInfoTable_IMPL

301#include "AMDGPUGenSearchableTables.inc"

302

304 unsigned VDataDwords, unsigned VAddrDwords) {

306 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);

307 return Info ? Info->Opcode : -1;

308}

309

314

320 return NewInfo ? NewInfo->Opcode : -1;

321}

322

325 bool IsG16Supported) {

326 unsigned AddrWords = BaseOpcode->NumExtraArgs;

329 if (IsA16)

330 AddrWords += divideCeil(AddrComponents, 2);

331 else

332 AddrWords += AddrComponents;

333

334

335

336

337

338

340 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)

341

342

343

345 else

347 }

348 return AddrWords;

349}

350

361

370

375

380

384

388

392

399

407

412

413#define GET_FP4FP8DstByteSelTable_DECL

414#define GET_FP4FP8DstByteSelTable_IMPL

415

420

426

427#define GET_MTBUFInfoTable_DECL

428#define GET_MTBUFInfoTable_IMPL

429#define GET_MUBUFInfoTable_DECL

430#define GET_MUBUFInfoTable_IMPL

431#define GET_SMInfoTable_DECL

432#define GET_SMInfoTable_IMPL

433#define GET_VOP1InfoTable_DECL

434#define GET_VOP1InfoTable_IMPL

435#define GET_VOP2InfoTable_DECL

436#define GET_VOP2InfoTable_IMPL

437#define GET_VOP3InfoTable_DECL

438#define GET_VOP3InfoTable_IMPL

439#define GET_VOPC64DPPTable_DECL

440#define GET_VOPC64DPPTable_IMPL

441#define GET_VOPC64DPP8Table_DECL

442#define GET_VOPC64DPP8Table_IMPL

443#define GET_VOPCAsmOnlyInfoTable_DECL

444#define GET_VOPCAsmOnlyInfoTable_IMPL

445#define GET_VOP3CAsmOnlyInfoTable_DECL

446#define GET_VOP3CAsmOnlyInfoTable_IMPL

447#define GET_VOPDComponentTable_DECL

448#define GET_VOPDComponentTable_IMPL

449#define GET_VOPDPairs_DECL

450#define GET_VOPDPairs_IMPL

451#define GET_VOPTrue16Table_DECL

452#define GET_VOPTrue16Table_IMPL

453#define GET_True16D16Table_IMPL

454#define GET_WMMAOpcode2AddrMappingTable_DECL

455#define GET_WMMAOpcode2AddrMappingTable_IMPL

456#define GET_WMMAOpcode3AddrMappingTable_DECL

457#define GET_WMMAOpcode3AddrMappingTable_IMPL

458#define GET_getMFMA_F8F6F4_WithSize_DECL

459#define GET_getMFMA_F8F6F4_WithSize_IMPL

460#define GET_isMFMA_F8F6F4Table_IMPL

461#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL

462

463#include "AMDGPUGenSearchableTables.inc"

464

467 return Info ? Info->BaseOpcode : -1;

468}

469

472 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);

473 return Info ? Info->Opcode : -1;

474}

475

478 return Info ? Info->elements : 0;

479}

480

483 return Info && Info->has_vaddr;

484}

485

488 return Info && Info->has_srsrc;

489}

490

493 return Info && Info->has_soffset;

494}

495

498 return Info ? Info->BaseOpcode : -1;

499}

500

503 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);

504 return Info ? Info->Opcode : -1;

505}

506

509 return Info ? Info->elements : 0;

510}

511

514 return Info && Info->has_vaddr;

515}

516

519 return Info && Info->has_srsrc;

520}

521

524 return Info && Info->has_soffset;

525}

526

529 return Info && Info->IsBufferInv;

530}

531

536

541

544 return Info || Info->IsSingle;

545}

546

549 return Info || Info->IsSingle;

550}

551

554 return Info || Info->IsSingle;

555}

556

558 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);

559}

560

562

567

570 return Info && Info->is_gfx940_xdl;

571}

572

575 return Info ? Info->is_wmma_xdl : false;

576}

577

579 switch (EncodingVal) {

582 return 6;

584 return 4;

587 default:

588 return 8;

589 }

590

592}

593

595 unsigned BLGP,

596 unsigned F8F8Opcode) {

599 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);

600}

601

603 switch (Fmt) {

606 return 16;

609 return 12;

611 return 8;

612 }

613

615}

616

618 unsigned FmtB,

619 unsigned F8F8Opcode) {

622 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);

623}

624

626 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))

628 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))

630 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))

632 llvm_unreachable("Subtarget generation does not support VOPD!");

633}

634

636 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;

637 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;

640

641

642

643

646 EncodingFamily, VOPD3) != -1;

647 return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY};

648 }

649

650 return {false, false};

651}

652

654 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;

655 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;

657 return Info ? Info->VOPDOp : ~0u;

658}

659

663

665 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||

666 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||

667 Opc == AMDGPU::V_MAC_F32_e64_vi ||

668 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||

669 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||

670 Opc == AMDGPU::V_MAC_F16_e64_vi ||

671 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||

672 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||

673 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||

674 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||

675 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||

676 Opc == AMDGPU::V_FMAC_F32_e64_vi ||

677 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||

678 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||

679 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||

680 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||

681 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||

682 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||

683 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||

684 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||

685 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||

686 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||

687 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||

688 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;

689}

690

692 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||

693 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||

694 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||

695 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||

696 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||

697 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||

698 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||

699 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;

700}

701

703 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||

704 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||

705 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||

706 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||

707 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||

708 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||

709 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||

710 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||

711 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||

712 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;

713}

714

716 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||

717 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||

718 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||

719 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||

720 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||

721 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||

722 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||

723 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||

724 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||

725 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||

726 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||

727 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||

728 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||

729 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||

730 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||

731 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||

732 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||

733 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||

734 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;

735}

736

738 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||

739 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||

740 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||

741 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||

742 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||

743 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||

744 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||

745 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;

746}

747

749 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||

750 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;

751}

752

767

772

777 if (Info->HasFP8DstByteSel)

779 if (Info->HasFP4DstByteSel)

781

783}

784

787 return Info ? Info->Opcode3Addr : ~0u;

788}

789

792 return Info ? Info->Opcode2Addr : ~0u;

793}

794

795

796

797

799 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));

800}

801

803 switch (Opc) {

804 default:

805 return 0;

806 case AMDGPU::V_AND_B32_e32:

807 return 0x40;

808 case AMDGPU::V_OR_B32_e32:

809 return 0x54;

810 case AMDGPU::V_XOR_B32_e32:

811 return 0x14;

812 case AMDGPU::V_XNOR_B32_e32:

813 return 0x41;

814 }

815}

816

817int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,

818 bool VOPD3) {

819 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;

820 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;

822 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);

823 return Info ? Info->Opcode : -1;

824}

825

827 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);

829 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);

830 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);

832 return {OpX->BaseVOP, OpY->BaseVOP};

833}

834

836

839

844 HasSrc2Acc = TiedIdx != -1;

846

851 : 1;

853

854 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||

855 Opcode == AMDGPU::V_CNDMASK_B32_e64) {

856

857

858 NumVOPD3Mods = 2;

859 if (IsVOP3)

860 SrcOperandsNum = 3;

862 getNamedOperandIdx(Opcode, OpName::src0))) {

863

864

865 NumVOPD3Mods = SrcOperandsNum;

866 if (HasSrc2Acc)

867 --NumVOPD3Mods;

868 }

869

871 return;

872

874 unsigned CompOprIdx;

875 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {

877 MandatoryLiteralIdx = CompOprIdx;

878 break;

879 }

880 }

881}

882

884 return getNamedOperandIdx(Opcode, OpName::bitop3);

885}

886

889

892

896

897

898 return 0;

899}

900

902 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,

904 bool VOPD3) const {

905

910

912 unsigned BanksMask) -> bool {

915 if (!BaseX)

916 BaseX = X;

917 if (!BaseY)

918 BaseY = Y;

919 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))

920 return true;

921 if (BaseX != X &&

922 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))

923 return true;

924 if (BaseY != Y &&

925 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))

926 return true;

927

928

929

930 return false;

931 };

932

933 unsigned CompOprIdx;

937 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])

938 continue;

939

942 return CompOprIdx;

943

945 continue;

946

948

949

950 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))

951 return CompOprIdx;

952 if (VOPD3)

953 continue;

954 }

955

956 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&

958 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))

959 return CompOprIdx;

960 }

961

962 return {};

963}

964

965

966

967

968

969

970

971

973InstInfo::getRegIndices(unsigned CompIdx,

974 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,

975 bool VOPD3) const {

977

978 const auto &Comp = CompInfo[CompIdx];

980

981 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());

982

983 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {

984 unsigned CompSrcIdx = CompOprIdx - DST_NUM;

986 Comp.hasRegSrcOperand(CompSrcIdx)

987 ? GetRegIdx(CompIdx,

988 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))

990 }

992}

993

994}

995

999

1003 const auto &OpXDesc = InstrInfo->get(OpX);

1004 const auto &OpYDesc = InstrInfo->get(OpY);

1009}

1010

1011namespace IsaInfo {

1012

1016 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))

1018 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))

1020}

1021

1023

1024

1025

1027 std::optional XnackRequested;

1028 std::optional SramEccRequested;

1029

1030 for (const std::string &Feature : Features.getFeatures()) {

1031 if (Feature == "+xnack")

1032 XnackRequested = true;

1033 else if (Feature == "-xnack")

1034 XnackRequested = false;

1035 else if (Feature == "+sramecc")

1036 SramEccRequested = true;

1037 else if (Feature == "-sramecc")

1038 SramEccRequested = false;

1039 }

1040

1043

1044 if (XnackRequested) {

1045 if (XnackSupported) {

1046 XnackSetting =

1048 } else {

1049

1050

1051 if (*XnackRequested) {

1052 errs() << "warning: xnack 'On' was requested for a processor that does "

1053 "not support it!\n";

1054 } else {

1055 errs() << "warning: xnack 'Off' was requested for a processor that "

1056 "does not support it!\n";

1057 }

1058 }

1059 }

1060

1061 if (SramEccRequested) {

1062 if (SramEccSupported) {

1063 SramEccSetting =

1065 } else {

1066

1067

1068

1069 if (*SramEccRequested) {

1070 errs() << "warning: sramecc 'On' was requested for a processor that "

1071 "does not support it!\n";

1072 } else {

1073 errs() << "warning: sramecc 'Off' was requested for a processor that "

1074 "does not support it!\n";

1075 }

1076 }

1077 }

1078}

1079

1089

1092 TargetID.split(TargetIDSplit, ':');

1093

1094 for (const auto &FeatureString : TargetIDSplit) {

1095 if (FeatureString.starts_with("xnack"))

1097 if (FeatureString.starts_with("sramecc"))

1099 }

1100}

1101

1103 std::string StringRep;

1105

1106 auto TargetTriple = STI.getTargetTriple();

1108

1109 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()

1110 << '-' << TargetTriple.getOSName() << '-'

1111 << TargetTriple.getEnvironmentName() << '-';

1112

1113 std::string Processor;

1114

1115

1116

1118 Processor = STI.getCPU().str();

1119 else

1122 .str();

1123

1124 std::string Features;

1125 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {

1126

1128 Features += ":sramecc-";

1130 Features += ":sramecc+";

1131

1133 Features += ":xnack-";

1135 Features += ":xnack+";

1136 }

1137

1138 StreamRep << Processor << Features;

1139

1140 return StringRep;

1141}

1142

1145 return 16;

1147 return 32;

1148

1149 return 64;

1150}

1151

1154

1155

1156

1157

1159 BytesPerCU *= 2;

1160

1161 return BytesPerCU;

1162}

1163

1165 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))

1166 return 32768;

1167 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))

1168 return 65536;

1169 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))

1170 return 163840;

1171 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize327680))

1172 return 327680;

1173 return 32768;

1174}

1175

1177

1178

1179

1180

1183 return 4;

1184 }

1185

1186

1187

1189 return 2;

1190

1191

1192

1193 return 4;

1194}

1195

1197 unsigned FlatWorkGroupSize) {

1198 assert(FlatWorkGroupSize != 0);

1200 return 8;

1203 if (N == 1) {

1204

1205 return MaxWaves;

1206 }

1207

1208 unsigned MaxBarriers = 16;

1210 MaxBarriers = 32;

1211

1212 return std::min(MaxWaves / N, MaxBarriers);

1213}

1214

1216

1218

1220 return 8;

1222 return 10;

1224}

1225

1227 unsigned FlatWorkGroupSize) {

1230}

1231

1233

1235

1236 return 1024;

1237}

1238

1240 unsigned FlatWorkGroupSize) {

1242}

1243

1246 if (Version.Major >= 10)

1249 return 16;

1250 return 8;

1251}

1252

1254

1258 return 800;

1259 return 512;

1260}

1261

1265

1267 if (Version.Major >= 10)

1268 return 106;

1270 return 102;

1271 return 104;

1272}

1273

1275 assert(WavesPerEU != 0);

1276

1278 if (Version.Major >= 10)

1279 return 0;

1280

1282 return 0;

1283

1284 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);

1286 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);

1289}

1290

1292 bool Addressable) {

1293 assert(WavesPerEU != 0);

1294

1297 if (Version.Major >= 10)

1298 return Addressable ? AddressableNumSGPRs : 108;

1299 if (Version.Major >= 8 && !Addressable)

1300 AddressableNumSGPRs = 112;

1303 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);

1305 return std::min(MaxNumSGPRs, AddressableNumSGPRs);

1306}

1307

1309 bool FlatScrUsed, bool XNACKUsed) {

1310 unsigned ExtraSGPRs = 0;

1311 if (VCCUsed)

1312 ExtraSGPRs = 2;

1313

1315 if (Version.Major >= 10)

1316 return ExtraSGPRs;

1317

1318 if (Version.Major < 8) {

1319 if (FlatScrUsed)

1320 ExtraSGPRs = 4;

1321 } else {

1322 if (XNACKUsed)

1323 ExtraSGPRs = 4;

1324

1325 if (FlatScrUsed ||

1327 ExtraSGPRs = 6;

1328 }

1329

1330 return ExtraSGPRs;

1331}

1332

1334 bool FlatScrUsed) {

1337}

1338

1340 unsigned Granule) {

1341 return divideCeil(std::max(1u, NumRegs), Granule);

1342}

1343

1349

1351 unsigned DynamicVGPRBlockSize,

1352 std::optional EnableWavefrontSize32) {

1354 return 8;

1355

1356 if (DynamicVGPRBlockSize != 0)

1357 return DynamicVGPRBlockSize;

1358

1359 bool IsWave32 = EnableWavefrontSize32

1360 ? *EnableWavefrontSize32

1362

1364 return IsWave32 ? 24 : 12;

1365

1367 return IsWave32 ? 16 : 8;

1368

1369 return IsWave32 ? 8 : 4;

1370}

1371

1373 std::optional EnableWavefrontSize32) {

1375 return 8;

1376

1377 bool IsWave32 = EnableWavefrontSize32

1378 ? *EnableWavefrontSize32

1380

1382 return IsWave32 ? 16 : 8;

1383

1384 return IsWave32 ? 8 : 4;

1385}

1386

1388

1391 return 512;

1393 return 256;

1396 return IsWave32 ? 1536 : 768;

1397 return IsWave32 ? 1024 : 512;

1398}

1399

1402 if (Features.test(Feature1024AddressableVGPRs))

1403 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;

1404 return 256;

1405}

1406

1408 unsigned DynamicVGPRBlockSize) {

1410 if (Features.test(FeatureGFX90AInsts))

1411 return 512;

1412

1413 if (DynamicVGPRBlockSize != 0)

1414

1417}

1418

1420 unsigned NumVGPRs,

1421 unsigned DynamicVGPRBlockSize) {

1425}

1426

1428 unsigned MaxWaves,

1429 unsigned TotalNumVGPRs) {

1430 if (NumVGPRs < Granule)

1431 return MaxWaves;

1432 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);

1433 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);

1434}

1435

1439 return MaxWaves;

1440

1442 if (SGPRs <= 80)

1443 return 10;

1444 if (SGPRs <= 88)

1445 return 9;

1446 if (SGPRs <= 100)

1447 return 8;

1448 return 7;

1449 }

1450 if (SGPRs <= 48)

1451 return 10;

1452 if (SGPRs <= 56)

1453 return 9;

1454 if (SGPRs <= 64)

1455 return 8;

1456 if (SGPRs <= 72)

1457 return 7;

1458 if (SGPRs <= 80)

1459 return 6;

1460 return 5;

1461}

1462

1464 unsigned DynamicVGPRBlockSize) {

1465 assert(WavesPerEU != 0);

1466

1468 if (WavesPerEU >= MaxWavesPerEU)

1469 return 0;

1470

1472 unsigned AddrsableNumVGPRs =

1475 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);

1476

1477 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))

1478 return 0;

1479

1481 DynamicVGPRBlockSize);

1482 if (WavesPerEU < MinWavesPerEU)

1483 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);

1484

1485 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);

1486 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);

1487 return std::min(MinNumVGPRs, AddrsableNumVGPRs);

1488}

1489

1491 unsigned DynamicVGPRBlockSize) {

1492 assert(WavesPerEU != 0);

1493

1494 unsigned MaxNumVGPRs =

1497 unsigned AddressableNumVGPRs =

1499 return std::min(MaxNumVGPRs, AddressableNumVGPRs);

1500}

1501

1503 std::optional EnableWavefrontSize32) {

1506 1;

1507}

1508

1510 unsigned NumVGPRs,

1511 unsigned DynamicVGPRBlockSize,

1512 std::optional EnableWavefrontSize32) {

1514 NumVGPRs,

1516}

1517}

1518

1532 } else {

1534 }

1535

1536

1537

1539

1540

1541

1545

1546 if (Version.Major >= 10) {

1550 }

1551}

1552

1556

1560

1566

1570

1572 return C == 'v' || C == 's' || C == 'a';

1573}

1574

1576 char Kind = RegName.front();

1578 return {};

1579

1581 if (RegName.consume_front("[")) {

1582 unsigned Idx, End;

1588 unsigned NumRegs = End - Idx + 1;

1589 if (NumRegs > 1)

1590 return {Kind, Idx, NumRegs};

1591 }

1592 } else {

1593 unsigned Idx;

1596 return {Kind, Idx, 1};

1597 }

1598

1599 return {};

1600}

1601

1602std::tuple<char, unsigned, unsigned>

1605 if (RegName.consume_front("{") || RegName.consume_back("}"))

1606 return {};

1608}

1609

1610std::pair<unsigned, unsigned>

1612 std::pair<unsigned, unsigned> Default,

1613 bool OnlyFirstRequired) {

1615 return {Attr->first, Attr->second.value_or(Default.second)};

1617}

1618

1619std::optional<std::pair<unsigned, std::optional>>

1621 bool OnlyFirstRequired) {

1623 if (A.isStringAttribute())

1624 return std::nullopt;

1625

1627 std::pair<unsigned, std::optional> Ints;

1628 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');

1629 if (Strs.first.trim().getAsInteger(0, Ints.first)) {

1630 Ctx.emitError("can't parse first integer attribute " + Name);

1631 return std::nullopt;

1632 }

1633 unsigned Second = 0;

1634 if (Strs.second.trim().getAsInteger(0, Second)) {

1635 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {

1636 Ctx.emitError("can't parse second integer attribute " + Name);

1637 return std::nullopt;

1638 }

1639 } else {

1640 Ints.second = Second;

1641 }

1642

1643 return Ints;

1644}

1645

1647 unsigned Size,

1649 std::optional<SmallVector> R =

1652}

1653

1654std::optional<SmallVector>

1658

1660 if (A.isValid())

1661 return std::nullopt;

1662 if (A.isStringAttribute()) {

1663 Ctx.emitError(Name + " is not a string attribute");

1664 return std::nullopt;

1665 }

1666

1668

1670 unsigned i = 0;

1671 for (; !S.empty() && i < Size; i++) {

1672 std::pair<StringRef, StringRef> Strs = S.split(',');

1673 unsigned IntVal;

1674 if (Strs.first.trim().getAsInteger(0, IntVal)) {

1675 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +

1676 Name);

1677 return std::nullopt;

1678 }

1679 Vals[i] = IntVal;

1680 S = Strs.second;

1681 }

1682

1684 Ctx.emitError("attribute " + Name +

1685 " has incorrect number of integers; expected " +

1687 return std::nullopt;

1688 }

1689 return Vals;

1690}

1691

1695 auto Low =

1699

1700

1701

1702

1704 if (Low.ule(Val) && High.ugt(Val))

1705 return true;

1706 } else {

1707 if (Low.uge(Val) && High.ult(Val))

1708 return true;

1709 }

1710 }

1711

1712 return false;

1713}

1714

1716 return (1 << (getVmcntBitWidthLo(Version.Major) +

1717 getVmcntBitWidthHi(Version.Major))) -

1718 1;

1719}

1720

1722 return (1 << getLoadcntBitWidth(Version.Major)) - 1;

1723}

1724

1726 return (1 << getSamplecntBitWidth(Version.Major)) - 1;

1727}

1728

1730 return (1 << getBvhcntBitWidth(Version.Major)) - 1;

1731}

1732

1734 return (1 << getExpcntBitWidth(Version.Major)) - 1;

1735}

1736

1738 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;

1739}

1740

1742 return (1 << getDscntBitWidth(Version.Major)) - 1;

1743}

1744

1746 return (1 << getKmcntBitWidth(Version.Major)) - 1;

1747}

1748

1750 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;

1751}

1752

1754 return (1 << getStorecntBitWidth(Version.Major)) - 1;

1755}

1756

1758 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),

1759 getVmcntBitWidthLo(Version.Major));

1760 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),

1761 getExpcntBitWidth(Version.Major));

1762 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),

1763 getLgkmcntBitWidth(Version.Major));

1764 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),

1765 getVmcntBitWidthHi(Version.Major));

1766 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;

1767}

1768

1770 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),

1771 getVmcntBitWidthLo(Version.Major));

1772 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),

1773 getVmcntBitWidthHi(Version.Major));

1774 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);

1775}

1776

1778 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),

1779 getExpcntBitWidth(Version.Major));

1780}

1781

1783 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),

1784 getLgkmcntBitWidth(Version.Major));

1785}

1786

1788 unsigned &Expcnt, unsigned &Lgkmcnt) {

1792}

1793

1799 return Decoded;

1800}

1801

1803 unsigned Vmcnt) {

1805 getVmcntBitWidthLo(Version.Major));

1806 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,

1807 getVmcntBitShiftHi(Version.Major),

1808 getVmcntBitWidthHi(Version.Major));

1809}

1810

1812 unsigned Expcnt) {

1813 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),

1814 getExpcntBitWidth(Version.Major));

1815}

1816

1818 unsigned Lgkmcnt) {

1819 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),

1820 getLgkmcntBitWidth(Version.Major));

1821}

1822

1824 unsigned Expcnt, unsigned Lgkmcnt) {

1830}

1831

1835

1837 bool IsStore) {

1838 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),

1839 getDscntBitWidth(Version.Major));

1840 if (IsStore) {

1841 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),

1842 getStorecntBitWidth(Version.Major));

1843 return Dscnt | Storecnt;

1844 }

1845 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),

1846 getLoadcntBitWidth(Version.Major));

1847 return Dscnt | Loadcnt;

1848}

1849

1853 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),

1854 getLoadcntBitWidth(Version.Major));

1855 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),

1856 getDscntBitWidth(Version.Major));

1857 return Decoded;

1858}

1859

1863 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),

1864 getStorecntBitWidth(Version.Major));

1865 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),

1866 getDscntBitWidth(Version.Major));

1867 return Decoded;

1868}

1869

1871 unsigned Loadcnt) {

1872 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),

1873 getLoadcntBitWidth(Version.Major));

1874}

1875

1877 unsigned Storecnt) {

1878 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),

1879 getStorecntBitWidth(Version.Major));

1880}

1881

1883 unsigned Dscnt) {

1884 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),

1885 getDscntBitWidth(Version.Major));

1886}

1887

1889 unsigned Dscnt) {

1894}

1895

1899

1901 unsigned Storecnt, unsigned Dscnt) {

1906}

1907

1909 const Waitcnt &Decoded) {

1911}

1912

1913

1914

1915

1916

1920 unsigned Enc = 0;

1921 for (int Idx = 0; Idx < Size; ++Idx) {

1922 const auto &Op = Opr[Idx];

1923 if (Op.isSupported(STI))

1924 Enc |= Op.encode(Op.Default);

1925 }

1926 return Enc;

1927}

1928

1930 int Size, unsigned Code,

1931 bool &HasNonDefaultVal,

1933 unsigned UsedOprMask = 0;

1934 HasNonDefaultVal = false;

1935 for (int Idx = 0; Idx < Size; ++Idx) {

1936 const auto &Op = Opr[Idx];

1937 if (Op.isSupported(STI))

1938 continue;

1939 UsedOprMask |= Op.getMask();

1940 unsigned Val = Op.decode(Code);

1941 if (Op.isValid(Val))

1942 return false;

1943 HasNonDefaultVal |= (Val != Op.Default);

1944 }

1945 return (Code & ~UsedOprMask) == 0;

1946}

1947

1949 unsigned Code, int &Idx, StringRef &Name,

1950 unsigned &Val, bool &IsDefault,

1952 while (Idx < Size) {

1953 const auto &Op = Opr[Idx++];

1954 if (Op.isSupported(STI)) {

1955 Name = Op.Name;

1956 Val = Op.decode(Code);

1957 IsDefault = (Val == Op.Default);

1958 return true;

1959 }

1960 }

1961

1962 return false;

1963}

1964

1966 int64_t InputVal) {

1967 if (InputVal < 0 || InputVal > Op.Max)

1969 return Op.encode(InputVal);

1970}

1971

1973 const StringRef Name, int64_t InputVal,

1974 unsigned &UsedOprMask,

1977 for (int Idx = 0; Idx < Size; ++Idx) {

1978 const auto &Op = Opr[Idx];

1979 if (Op.Name == Name) {

1980 if (Op.isSupported(STI)) {

1982 continue;

1983 }

1984 auto OprMask = Op.getMask();

1985 if (OprMask & UsedOprMask)

1987 UsedOprMask |= OprMask;

1989 }

1990 }

1991 return InvalidId;

1992}

1993

1994

1995

1996

1997

1998namespace DepCtr {

1999

2001 static int Default = -1;

2005}

2006

2010 HasNonDefaultVal, STI);

2011}

2012

2016 IsDefault, STI);

2017}

2018

2024

2026 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());

2027}

2028

2030 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());

2031}

2032

2034 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());

2035}

2036

2038 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());

2039}

2040

2042 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());

2043}

2044

2046 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());

2047}

2048

2050 return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());

2051}

2052

2054 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());

2055}

2056

2061

2063 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());

2064}

2065

2070

2072 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());

2073}

2074

2079

2081 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());

2082}

2083

2088

2090 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());

2091}

2092

2097

2099 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());

2100}

2101

2106

2108 return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());

2109}

2110

2115

2116}

2117

2118

2119

2120

2121

2122namespace Exp {

2123

2129

2130

2140

2141

2144 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {

2145 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);

2146 Name = Val.Name;

2147 return true;

2148 }

2149 }

2150 return false;

2151}

2152

2154

2156 if (Val.MaxIndex == 0 && Name == Val.Name)

2157 return Val.Tgt;

2158

2159 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {

2160 StringRef Suffix = Name.drop_front(Val.Name.size());

2161

2162 unsigned Id;

2163 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)

2165

2166

2167 if (Suffix.size() > 1 && Suffix[0] == '0')

2169

2170 return Val.Tgt + Id;

2171 }

2172 }

2174}

2175

2177 switch (Id) {

2186 default:

2189 return true;

2190 }

2191}

2192

2193}

2194

2195

2196

2197

2198

2199namespace MTBUFFormat {

2200

2204 return Id;

2205 }

2207}

2208

2213

2221

2225 if (Name == lookupTable[Id])

2226 return Id;

2227 }

2229}

2230

2235

2237 unsigned Dfmt;

2238 unsigned Nfmt;

2241}

2242

2246

2250

2255

2260 return Id;

2261 }

2262 } else {

2265 return Id;

2266 }

2267 }

2269}

2270

2276

2280

2287 return Id;

2288 }

2289 } else {

2292 return Id;

2293 }

2294 }

2296}

2297

2301

2307

2308}

2309

2310

2311

2312

2313

2314namespace SendMsg {

2315

2319

2323

2325 bool Strict) {

2327

2328 if (!Strict)

2330

2333 return false;

2334

2336 }

2337

2339}

2340

2344

2345 if (!Strict)

2347

2349 switch (MsgId) {

2356 }

2357 }

2359}

2360

2366

2373

2378 OpId = 0;

2380 } else {

2383 }

2384}

2385

2389

2390}

2391

2392

2393

2394

2395

2397 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);

2398}

2399

2401

2402 return F.getFnAttributeAsParsedInteger(

2403 "amdgpu-color-export",

2405}

2406

2408 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;

2409}

2410

2413 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);

2414

2417

2418 return 0;

2419}

2420

2422 return STI.hasFeature(AMDGPU::FeatureXNACK);

2423}

2424

2426 return STI.hasFeature(AMDGPU::FeatureSRAMECC);

2427}

2428

2430 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&

2431 !STI.hasFeature(AMDGPU::FeatureR128A16);

2432}

2433

2435 return STI.hasFeature(AMDGPU::FeatureA16);

2436}

2437

2439 return STI.hasFeature(AMDGPU::FeatureG16);

2440}

2441

2443 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && isCI(STI) &&

2445}

2446

2448 return STI.hasFeature(AMDGPU::FeatureGDS);

2449}

2450

2453 if (Version.Major == 10)

2454 return Version.Minor >= 3 ? 13 : 5;

2455 if (Version.Major == 11)

2456 return 5;

2457 if (Version.Major >= 12)

2458 return HasSampler ? 4 : 5;

2459 return 0;

2460}

2461

2464 return 32;

2465 return 16;

2466}

2467

2469 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);

2470}

2471

2473 return STI.hasFeature(AMDGPU::FeatureSeaIslands);

2474}

2475

2477 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);

2478}

2479

2481 return STI.hasFeature(AMDGPU::FeatureGFX9);

2482}

2483

2487

2491

2495

2499

2503

2505

2507 return STI.hasFeature(AMDGPU::FeatureGFX10);

2508}

2509

2513

2517

2519 return STI.hasFeature(AMDGPU::FeatureGFX11);

2520}

2521

2525

2529

2531

2533

2537

2543

2545

2549

2553

2555 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);

2556}

2557

2559 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);

2560}

2561

2563 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);

2564}

2565

2567 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);

2568}

2569

2573

2575 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);

2576}

2577

2579 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);

2580}

2581

2583 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);

2584}

2585

2587 return STI.hasFeature(AMDGPU::FeatureMAIInsts);

2588}

2589

2591 return STI.hasFeature(AMDGPU::FeatureVOPD);

2592}

2593

2595 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);

2596}

2597

2599 return STI.hasFeature(AMDGPU::FeatureKernargPreload);

2600}

2601

2603 int32_t ArgNumVGPR) {

2604 if (has90AInsts && ArgNumAGPR)

2605 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;

2606 return std::max(ArgNumVGPR, ArgNumAGPR);

2607}

2608

2610 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);

2611 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);

2612 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||

2613 Reg == AMDGPU::SCC;

2614}

2615

2619

2620#define MAP_REG2REG \

2621 using namespace AMDGPU; \

2622 switch (Reg.id()) { \

2623 default: \

2624 return Reg; \

2625 CASE_CI_VI(FLAT_SCR) \

2626 CASE_CI_VI(FLAT_SCR_LO) \

2627 CASE_CI_VI(FLAT_SCR_HI) \

2628 CASE_VI_GFX9PLUS(TTMP0) \

2629 CASE_VI_GFX9PLUS(TTMP1) \

2630 CASE_VI_GFX9PLUS(TTMP2) \

2631 CASE_VI_GFX9PLUS(TTMP3) \

2632 CASE_VI_GFX9PLUS(TTMP4) \

2633 CASE_VI_GFX9PLUS(TTMP5) \

2634 CASE_VI_GFX9PLUS(TTMP6) \

2635 CASE_VI_GFX9PLUS(TTMP7) \

2636 CASE_VI_GFX9PLUS(TTMP8) \

2637 CASE_VI_GFX9PLUS(TTMP9) \

2638 CASE_VI_GFX9PLUS(TTMP10) \

2639 CASE_VI_GFX9PLUS(TTMP11) \

2640 CASE_VI_GFX9PLUS(TTMP12) \

2641 CASE_VI_GFX9PLUS(TTMP13) \

2642 CASE_VI_GFX9PLUS(TTMP14) \

2643 CASE_VI_GFX9PLUS(TTMP15) \

2644 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \

2645 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \

2646 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \

2647 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \

2648 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \

2649 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \

2650 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \

2651 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \

2652 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \

2653 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \

2654 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \

2655 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \

2656 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \

2657 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \

2658 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \

2659 CASE_VI_GFX9PLUS( \

2660 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \

2661 CASE_GFXPRE11_GFX11PLUS(M0) \

2662 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \

2663 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \

2664 }

2665

2666#define CASE_CI_VI(node) \

2667 assert(!isSI(STI)); \

2668 case node: \

2669 return isCI(STI) ? node##_ci : node##_vi;

2670

2671#define CASE_VI_GFX9PLUS(node) \

2672 case node: \

2673 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;

2674

2675#define CASE_GFXPRE11_GFX11PLUS(node) \

2676 case node: \

2677 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;

2678

2679#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \

2680 case node: \

2681 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;

2682

2688

2689#undef CASE_CI_VI

2690#undef CASE_VI_GFX9PLUS

2691#undef CASE_GFXPRE11_GFX11PLUS

2692#undef CASE_GFXPRE11_GFX11PLUS_TO

2693

2694#define CASE_CI_VI(node) \

2695 case node##_ci: \

2696 case node##_vi: \

2697 return node;

2698#define CASE_VI_GFX9PLUS(node) \

2699 case node##_vi: \

2700 case node##_gfx9plus: \

2701 return node;

2702#define CASE_GFXPRE11_GFX11PLUS(node) \

2703 case node##_gfx11plus: \

2704 case node##_gfxpre11: \

2705 return node;

2706#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)

2707

2709

2711 switch (Reg.id()) {

2712 case AMDGPU::SRC_SHARED_BASE_LO:

2713 case AMDGPU::SRC_SHARED_BASE:

2714 case AMDGPU::SRC_SHARED_LIMIT_LO:

2715 case AMDGPU::SRC_SHARED_LIMIT:

2716 case AMDGPU::SRC_PRIVATE_BASE_LO:

2717 case AMDGPU::SRC_PRIVATE_BASE:

2718 case AMDGPU::SRC_PRIVATE_LIMIT_LO:

2719 case AMDGPU::SRC_PRIVATE_LIMIT:

2720 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:

2721 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:

2722 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:

2723 return true;

2724 case AMDGPU::SRC_VCCZ:

2725 case AMDGPU::SRC_EXECZ:

2726 case AMDGPU::SRC_SCC:

2727 return true;

2728 case AMDGPU::SGPR_NULL:

2729 return true;

2730 default:

2731 return false;

2732 }

2733}

2734

2735#undef CASE_CI_VI

2736#undef CASE_VI_GFX9PLUS

2737#undef CASE_GFXPRE11_GFX11PLUS

2738#undef CASE_GFXPRE11_GFX11PLUS_TO

2739#undef MAP_REG2REG

2740

2743 unsigned OpType = Desc.operands()[OpNo].OperandType;

2746}

2747

2750 unsigned OpType = Desc.operands()[OpNo].OperandType;

2751 switch (OpType) {

2764 return true;

2765 default:

2766 return false;

2767 }

2768}

2769

2772 unsigned OpType = Desc.operands()[OpNo].OperandType;

2777}

2778

2779

2780

2782 switch (RCID) {

2783 case AMDGPU::VGPR_16RegClassID:

2784 case AMDGPU::VGPR_16_Lo128RegClassID:

2785 case AMDGPU::SGPR_LO16RegClassID:

2786 case AMDGPU::AGPR_LO16RegClassID:

2787 return 16;

2788 case AMDGPU::SGPR_32RegClassID:

2789 case AMDGPU::VGPR_32RegClassID:

2790 case AMDGPU::VGPR_32_Lo256RegClassID:

2791 case AMDGPU::VRegOrLds_32RegClassID:

2792 case AMDGPU::AGPR_32RegClassID:

2793 case AMDGPU::VS_32RegClassID:

2794 case AMDGPU::AV_32RegClassID:

2795 case AMDGPU::SReg_32RegClassID:

2796 case AMDGPU::SReg_32_XM0RegClassID:

2797 case AMDGPU::SRegOrLds_32RegClassID:

2798 return 32;

2799 case AMDGPU::SGPR_64RegClassID:

2800 case AMDGPU::VS_64RegClassID:

2801 case AMDGPU::SReg_64RegClassID:

2802 case AMDGPU::VReg_64RegClassID:

2803 case AMDGPU::AReg_64RegClassID:

2804 case AMDGPU::SReg_64_XEXECRegClassID:

2805 case AMDGPU::VReg_64_Align2RegClassID:

2806 case AMDGPU::AReg_64_Align2RegClassID:

2807 case AMDGPU::AV_64RegClassID:

2808 case AMDGPU::AV_64_Align2RegClassID:

2809 case AMDGPU::VReg_64_Lo256_Align2RegClassID:

2810 case AMDGPU::VS_64_Lo256RegClassID:

2811 return 64;

2812 case AMDGPU::SGPR_96RegClassID:

2813 case AMDGPU::SReg_96RegClassID:

2814 case AMDGPU::VReg_96RegClassID:

2815 case AMDGPU::AReg_96RegClassID:

2816 case AMDGPU::VReg_96_Align2RegClassID:

2817 case AMDGPU::AReg_96_Align2RegClassID:

2818 case AMDGPU::AV_96RegClassID:

2819 case AMDGPU::AV_96_Align2RegClassID:

2820 case AMDGPU::VReg_96_Lo256_Align2RegClassID:

2821 return 96;

2822 case AMDGPU::SGPR_128RegClassID:

2823 case AMDGPU::SReg_128RegClassID:

2824 case AMDGPU::VReg_128RegClassID:

2825 case AMDGPU::AReg_128RegClassID:

2826 case AMDGPU::VReg_128_Align2RegClassID:

2827 case AMDGPU::AReg_128_Align2RegClassID:

2828 case AMDGPU::AV_128RegClassID:

2829 case AMDGPU::AV_128_Align2RegClassID:

2830 case AMDGPU::SReg_128_XNULLRegClassID:

2831 case AMDGPU::VReg_128_Lo256_Align2RegClassID:

2832 return 128;

2833 case AMDGPU::SGPR_160RegClassID:

2834 case AMDGPU::SReg_160RegClassID:

2835 case AMDGPU::VReg_160RegClassID:

2836 case AMDGPU::AReg_160RegClassID:

2837 case AMDGPU::VReg_160_Align2RegClassID:

2838 case AMDGPU::AReg_160_Align2RegClassID:

2839 case AMDGPU::AV_160RegClassID:

2840 case AMDGPU::AV_160_Align2RegClassID:

2841 case AMDGPU::VReg_160_Lo256_Align2RegClassID:

2842 return 160;

2843 case AMDGPU::SGPR_192RegClassID:

2844 case AMDGPU::SReg_192RegClassID:

2845 case AMDGPU::VReg_192RegClassID:

2846 case AMDGPU::AReg_192RegClassID:

2847 case AMDGPU::VReg_192_Align2RegClassID:

2848 case AMDGPU::AReg_192_Align2RegClassID:

2849 case AMDGPU::AV_192RegClassID:

2850 case AMDGPU::AV_192_Align2RegClassID:

2851 case AMDGPU::VReg_192_Lo256_Align2RegClassID:

2852 return 192;

2853 case AMDGPU::SGPR_224RegClassID:

2854 case AMDGPU::SReg_224RegClassID:

2855 case AMDGPU::VReg_224RegClassID:

2856 case AMDGPU::AReg_224RegClassID:

2857 case AMDGPU::VReg_224_Align2RegClassID:

2858 case AMDGPU::AReg_224_Align2RegClassID:

2859 case AMDGPU::AV_224RegClassID:

2860 case AMDGPU::AV_224_Align2RegClassID:

2861 case AMDGPU::VReg_224_Lo256_Align2RegClassID:

2862 return 224;

2863 case AMDGPU::SGPR_256RegClassID:

2864 case AMDGPU::SReg_256RegClassID:

2865 case AMDGPU::VReg_256RegClassID:

2866 case AMDGPU::AReg_256RegClassID:

2867 case AMDGPU::VReg_256_Align2RegClassID:

2868 case AMDGPU::AReg_256_Align2RegClassID:

2869 case AMDGPU::AV_256RegClassID:

2870 case AMDGPU::AV_256_Align2RegClassID:

2871 case AMDGPU::SReg_256_XNULLRegClassID:

2872 case AMDGPU::VReg_256_Lo256_Align2RegClassID:

2873 return 256;

2874 case AMDGPU::SGPR_288RegClassID:

2875 case AMDGPU::SReg_288RegClassID:

2876 case AMDGPU::VReg_288RegClassID:

2877 case AMDGPU::AReg_288RegClassID:

2878 case AMDGPU::VReg_288_Align2RegClassID:

2879 case AMDGPU::AReg_288_Align2RegClassID:

2880 case AMDGPU::AV_288RegClassID:

2881 case AMDGPU::AV_288_Align2RegClassID:

2882 case AMDGPU::VReg_288_Lo256_Align2RegClassID:

2883 return 288;

2884 case AMDGPU::SGPR_320RegClassID:

2885 case AMDGPU::SReg_320RegClassID:

2886 case AMDGPU::VReg_320RegClassID:

2887 case AMDGPU::AReg_320RegClassID:

2888 case AMDGPU::VReg_320_Align2RegClassID:

2889 case AMDGPU::AReg_320_Align2RegClassID:

2890 case AMDGPU::AV_320RegClassID:

2891 case AMDGPU::AV_320_Align2RegClassID:

2892 case AMDGPU::VReg_320_Lo256_Align2RegClassID:

2893 return 320;

2894 case AMDGPU::SGPR_352RegClassID:

2895 case AMDGPU::SReg_352RegClassID:

2896 case AMDGPU::VReg_352RegClassID:

2897 case AMDGPU::AReg_352RegClassID:

2898 case AMDGPU::VReg_352_Align2RegClassID:

2899 case AMDGPU::AReg_352_Align2RegClassID:

2900 case AMDGPU::AV_352RegClassID:

2901 case AMDGPU::AV_352_Align2RegClassID:

2902 case AMDGPU::VReg_352_Lo256_Align2RegClassID:

2903 return 352;

2904 case AMDGPU::SGPR_384RegClassID:

2905 case AMDGPU::SReg_384RegClassID:

2906 case AMDGPU::VReg_384RegClassID:

2907 case AMDGPU::AReg_384RegClassID:

2908 case AMDGPU::VReg_384_Align2RegClassID:

2909 case AMDGPU::AReg_384_Align2RegClassID:

2910 case AMDGPU::AV_384RegClassID:

2911 case AMDGPU::AV_384_Align2RegClassID:

2912 case AMDGPU::VReg_384_Lo256_Align2RegClassID:

2913 return 384;

2914 case AMDGPU::SGPR_512RegClassID:

2915 case AMDGPU::SReg_512RegClassID:

2916 case AMDGPU::VReg_512RegClassID:

2917 case AMDGPU::AReg_512RegClassID:

2918 case AMDGPU::VReg_512_Align2RegClassID:

2919 case AMDGPU::AReg_512_Align2RegClassID:

2920 case AMDGPU::AV_512RegClassID:

2921 case AMDGPU::AV_512_Align2RegClassID:

2922 case AMDGPU::VReg_512_Lo256_Align2RegClassID:

2923 return 512;

2924 case AMDGPU::SGPR_1024RegClassID:

2925 case AMDGPU::SReg_1024RegClassID:

2926 case AMDGPU::VReg_1024RegClassID:

2927 case AMDGPU::AReg_1024RegClassID:

2928 case AMDGPU::VReg_1024_Align2RegClassID:

2929 case AMDGPU::AReg_1024_Align2RegClassID:

2930 case AMDGPU::AV_1024RegClassID:

2931 case AMDGPU::AV_1024_Align2RegClassID:

2932 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:

2933 return 1024;

2934 default:

2936 }

2937}

2938

2942

2945 return true;

2946

2957 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);

2958}

2959

2962 return true;

2963

2964

2965

2966

2967

2968

2969

2970

2971

2972

2983 (Val == 0x3e22f983 && HasInv2Pi);

2984}

2985

2987 if (!HasInv2Pi)

2988 return false;

2990 return true;

2992 return Val == 0x3F00 ||

2993 Val == 0xBF00 ||

2994 Val == 0x3F80 ||

2995 Val == 0xBF80 ||

2996 Val == 0x4000 ||

2997 Val == 0xC000 ||

2998 Val == 0x4080 ||

2999 Val == 0xC080 ||

3000 Val == 0x3E22;

3001}

3002

3006

3008 if (!HasInv2Pi)

3009 return false;

3011 return true;

3013 return Val == 0x3C00 ||

3014 Val == 0xBC00 ||

3015 Val == 0x3800 ||

3016 Val == 0xB800 ||

3017 Val == 0x4000 ||

3018 Val == 0xC000 ||

3019 Val == 0x4400 ||

3020 Val == 0xC400 ||

3021 Val == 0x3118;

3022}

3023

3025

3026

3027

3028

3029

3030

3031

3032

3033

3034

3037 return 128 + Signed;

3038

3040 return 192 + std::abs(Signed);

3041

3042 if (IsFloat) {

3043

3045 case 0x3800: return 240;

3046 case 0xB800: return 241;

3047 case 0x3C00: return 242;

3048 case 0xBC00: return 243;

3049 case 0x4000: return 244;

3050 case 0xC000: return 245;

3051 case 0x4400: return 246;

3052 case 0xC400: return 247;

3053 case 0x3118: return 248;

3054 default: break;

3055 }

3056

3057 } else {

3058

3060 case 0x3F000000: return 240;

3061 case 0xBF000000: return 241;

3062 case 0x3F800000: return 242;

3063 case 0xBF800000: return 243;

3064 case 0x40000000: return 244;

3065 case 0xC0000000: return 245;

3066 case 0x40800000: return 246;

3067 case 0xC0800000: return 247;

3068 case 0x3E22F983: return 248;

3069 default: break;

3070 }

3071

3072 }

3073

3074 return {};

3075}

3076

3077

3078

3082

3083

3084

3088 return 128 + Signed;

3089

3091 return 192 + std::abs(Signed);

3092

3093

3095 case 0x3F00: return 240;

3096 case 0xBF00: return 241;

3097 case 0x3F80: return 242;

3098 case 0xBF80: return 243;

3099 case 0x4000: return 244;

3100 case 0xC000: return 245;

3101 case 0x4080: return 246;

3102 case 0xC080: return 247;

3103 case 0x3E22: return 248;

3104 default: break;

3105 }

3106

3107

3108 return std::nullopt;

3109}

3110

3111

3112

3116

3117

3119 switch (OpType) {

3130 return false;

3131 default:

3133 }

3134}

3135

3136

3140

3141

3145

3146

3150

3152 if (IsFP64)

3153 return Lo\_32(Val);

3154

3156}

3157

3159 switch (Type) {

3160 default:

3161 break;

3166 return Imm & 0xffff;

3182 }

3183 return Imm;

3184}

3185

3188

3189

3191 switch (CC) {

3194 return true;

3205

3206

3207 return A->hasAttribute(Attribute::InReg) ||

3208 A->hasAttribute(Attribute::ByVal);

3209 default:

3210

3211 return A->hasAttribute(Attribute::InReg);

3212 }

3213}

3214

3216

3218 switch (CC) {

3221 return true;

3232

3233

3234 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||

3236 default:

3237 return CB->paramHasAttr(ArgNo, Attribute::InReg);

3238 }

3239}

3240

3244

3246 int64_t EncodedOffset) {

3249

3252}

3253

3255 int64_t EncodedOffset, bool IsBuffer) {

3257 if (IsBuffer && EncodedOffset < 0)

3258 return false;

3259 return isInt<24>(EncodedOffset);

3260 }

3261

3263}

3264

3266 return (ByteOffset & 3) == 0;

3267}

3268

3272 return ByteOffset;

3273

3275 return ByteOffset >> 2;

3276}

3277

3279 int64_t ByteOffset, bool IsBuffer,

3280 bool HasSOffset) {

3281

3282

3283

3285 return std::nullopt;

3286

3287 if (isGFX12Plus(ST))

3288 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)

3289 : std::nullopt;

3290

3291

3294 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)

3295 : std::nullopt;

3296 }

3297

3299 return std::nullopt;

3300

3303 ? std::optional<int64_t>(EncodedOffset)

3304 : std::nullopt;

3305}

3306

3308 int64_t ByteOffset) {

3310 return std::nullopt;

3311

3313 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)

3314 : std::nullopt;

3315}

3316

3319 return 12;

3320

3322 return 24;

3323 return 13;

3324}

3325

3326namespace {

3327

3328struct SourceOfDivergence {

3329 unsigned Intr;

3330};

3331const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);

3332

3334 unsigned Intr;

3335};

3336const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);

3337

3338#define GET_SourcesOfDivergence_IMPL

3339#define GET_UniformIntrinsics_IMPL

3340#define GET_Gfx9BufferFormat_IMPL

3341#define GET_Gfx10BufferFormat_IMPL

3342#define GET_Gfx11PlusBufferFormat_IMPL

3343

3344#include "AMDGPUGenSearchableTables.inc"

3345

3346}

3347

3349 return lookupSourceOfDivergence(IntrID);

3350}

3351

3353 return lookupAlwaysUniform(IntrID);

3354}

3355

3360 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(

3361 BitsPerComp, NumComponents, NumFormat)

3363 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)

3364 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);

3365}

3366

3370 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)

3371 : getGfx9BufferFormatInfo(Format);

3372}

3373

3376 const unsigned VGPRClasses[] = {

3377 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,

3378 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,

3379 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,

3380 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,

3381 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,

3382 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,

3383 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,

3384 AMDGPU::VReg_1024RegClassID};

3385

3386 for (unsigned RCID : VGPRClasses) {

3389 return &RC;

3390 }

3391

3392 return nullptr;

3393}

3394

3396 unsigned Enc = MRI.getEncodingValue(Reg);

3398 return Idx >> 8;

3399}

3400

3403 unsigned Enc = MRI.getEncodingValue(Reg);

3405 if (Idx >= 0x100)

3407

3409 if (!RC)

3411

3412 Idx |= MSBs << 8;

3413 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {

3414

3415 Idx *= 2;

3417 ++Idx;

3418 }

3419

3421}

3422

3423std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>

3425 static const AMDGPU::OpName VOPOps[4] = {

3426 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,

3427 AMDGPU::OpName::vdst};

3428 static const AMDGPU::OpName VDSOps[4] = {

3429 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,

3430 AMDGPU::OpName::vdst};

3431 static const AMDGPU::OpName FLATOps[4] = {

3432 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,

3433 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};

3434 static const AMDGPU::OpName BUFOps[4] = {

3435 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,

3436 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};

3437 static const AMDGPU::OpName VIMGOps[4] = {

3438 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,

3439 AMDGPU::OpName::vdata};

3440

3441

3442

3443

3444 static const AMDGPU::OpName VOPDOpsX[4] = {

3445 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,

3446 AMDGPU::OpName::vdstX};

3447 static const AMDGPU::OpName VOPDOpsY[4] = {

3448 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,

3449 AMDGPU::OpName::vdstY};

3450

3451

3452 static const AMDGPU::OpName VOP2MADMKOps[4] = {

3453 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,

3454 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};

3455 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {

3456 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,

3457 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};

3458 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {

3459 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,

3460 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};

3461

3462 unsigned TSFlags = Desc.TSFlags;

3463

3464 if (TSFlags &

3467 switch (Desc.getOpcode()) {

3468

3469 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:

3470 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:

3471 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:

3472 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:

3473 return {};

3474 case AMDGPU::V_FMAMK_F16:

3475 case AMDGPU::V_FMAMK_F16_t16:

3476 case AMDGPU::V_FMAMK_F16_t16_gfx12:

3477 case AMDGPU::V_FMAMK_F16_fake16:

3478 case AMDGPU::V_FMAMK_F16_fake16_gfx12:

3479 case AMDGPU::V_FMAMK_F32:

3480 case AMDGPU::V_FMAMK_F32_gfx12:

3481 case AMDGPU::V_FMAMK_F64:

3482 case AMDGPU::V_FMAMK_F64_gfx1250:

3483 return {VOP2MADMKOps, nullptr};

3484 default:

3485 break;

3486 }

3487 return {VOPOps, nullptr};

3488 }

3489

3491 return {VDSOps, nullptr};

3492

3494 return {FLATOps, nullptr};

3495

3497 return {BUFOps, nullptr};

3498

3500 return {VIMGOps, nullptr};

3501

3504 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,

3505 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};

3506 }

3507

3509

3511 llvm_unreachable("Sample and export VGPR lowering is not implemented and"

3512 " these instructions are not expected on gfx1250");

3513

3514 return {};

3515}

3516

3519

3523 return false;

3524

3525

3528

3529

3532

3533 return false;

3534}

3535

3538 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {

3539 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);

3540 if (Idx == -1)

3541 continue;

3542

3546 if (RegClass == AMDGPU::VReg_64RegClassID ||

3547 RegClass == AMDGPU::VReg_64_Align2RegClassID)

3548 return true;

3549 }

3550

3551 return false;

3552}

3553

3555 switch (Opc) {

3556 case AMDGPU::V_MUL_LO_U32_e64:

3557 case AMDGPU::V_MUL_LO_U32_e64_dpp:

3558 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:

3559 case AMDGPU::V_MUL_HI_U32_e64:

3560 case AMDGPU::V_MUL_HI_U32_e64_dpp:

3561 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:

3562 case AMDGPU::V_MUL_HI_I32_e64:

3563 case AMDGPU::V_MUL_HI_I32_e64_dpp:

3564 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:

3565 case AMDGPU::V_MAD_U32_e64:

3566 case AMDGPU::V_MAD_U32_e64_dpp:

3567 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:

3568 return true;

3569 default:

3570 return false;

3571 }

3572}

3573

3576 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))

3577 return false;

3578

3580 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);

3581

3583}

3584

3586 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))

3587 return 64;

3588 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))

3589 return 128;

3590 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))

3591 return 320;

3592 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))

3593 return 512;

3594 return 64;

3595}

3596

3598 switch (Opc) {

3599 case AMDGPU::V_PK_ADD_F32:

3600 case AMDGPU::V_PK_ADD_F32_gfx12:

3601 case AMDGPU::V_PK_MUL_F32:

3602 case AMDGPU::V_PK_MUL_F32_gfx12:

3603 case AMDGPU::V_PK_FMA_F32:

3604 case AMDGPU::V_PK_FMA_F32_gfx12:

3605 return true;

3606 default:

3607 return false;

3608 }

3609}

3610

3615

3619

3622 return "";

3624 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;

3625 return Buffer.c_str();

3626 }

3628 OS << EncoVariableDims << ',' << EncoVariableDims << ','

3629 << EncoVariableDims;

3630 return Buffer.c_str();

3631 }

3633 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];

3634 return Buffer.c_str();

3635 }

3636 }

3638}

3639

3641 std::optional<SmallVector> Attr =

3644

3645 if (!Attr.has_value())

3647 else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; }))

3649 else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; }))

3651

3654 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};

3655

3656 return A;

3657}

3658

3659}

3660

3663 switch (S) {

3665 OS << "Unsupported";

3666 break;

3668 OS << "Any";

3669 break;

3671 OS << "Off";

3672 break;

3674 OS << "On";

3675 break;

3676 }

3677 return OS;

3678}

3679

3680}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static llvm:🆑:opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm:🆑:Hidden, llvm:🆑:init(llvm::AMDGPU::AMDHSA_COV6), llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))

#define MAP_REG2REG

Definition AMDGPUBaseInfo.cpp:2620

Provides AMDGPU specific target descriptions.

MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32

This file contains the simple types necessary to represent the attributes associated with functions a...

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Analysis containing CSE Info

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Register const TargetRegisterInfo * TRI

#define S_00B848_MEM_ORDERED(x)

#define S_00B848_WGP_MODE(x)

#define S_00B848_FWD_PROGRESS(x)

unsigned unsigned DefaultVal

static const int BlockSize

static ClusterDimsAttr get(const Function &F)

Definition AMDGPUBaseInfo.cpp:3640

ClusterDimsAttr()=default

std::string to_string() const

Definition AMDGPUBaseInfo.cpp:3616

const std::array< unsigned, 3 > & getDims() const

Definition AMDGPUBaseInfo.cpp:3611

bool isSramEccSupported() const

void setTargetIDFromFeaturesString(StringRef FS)

Definition AMDGPUBaseInfo.cpp:1022

TargetIDSetting getXnackSetting() const

AMDGPUTargetID(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1013

bool isXnackSupported() const

void setTargetIDFromTargetIDStream(StringRef TargetID)

Definition AMDGPUBaseInfo.cpp:1090

std::string toString() const

Definition AMDGPUBaseInfo.cpp:1102

TargetIDSetting getSramEccSetting() const

unsigned getIndexInParsedOperands(unsigned CompOprIdx) const

Definition AMDGPUBaseInfo.cpp:887

unsigned getIndexOfDstInParsedOperands() const

unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const

int getBitOp3OperandIdx() const

Definition AMDGPUBaseInfo.cpp:883

unsigned getCompParsedSrcOperandsNum() const

std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const

Definition AMDGPUBaseInfo.cpp:901

std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices

This class represents an incoming formal argument to a Function.

Functions, function parameters, and return types can have attributes to indicate how they should be t...

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

CallingConv::ID getCallingConv() const

LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const

Determine whether the argument or parameter has the given attribute.

constexpr bool test(unsigned I) const

unsigned getAddressSpace() const

This is an important class for using LLVM in a threaded context.

Describe properties that are true of each instruction in the target description file.

unsigned getNumOperands() const

Return the number of declared MachineOperands for this MachineInstruction.

ArrayRef< MCOperandInfo > operands() const

bool mayStore() const

Return true if this instruction could possibly modify memory.

bool mayLoad() const

Return true if this instruction could possibly read memory.

unsigned getNumDefs() const

Return the number of MachineOperands that are register definitions.

int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const

Returns the value of the specified operand constraint if it is present.

unsigned getOpcode() const

Return the opcode number for this descriptor.

Interface to description of machine instruction set.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const

Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.

This holds information about one operand of a machine instruction, indicating the register class for ...

MCRegisterClass - Base class of TargetRegisterClass.

unsigned getID() const

getID() - Return the register class ID number.

MCRegister getRegister(unsigned i) const

getRegister - Return the specified register in the class.

bool contains(MCRegister Reg) const

contains - Return true if the specified register is included in this register class.

MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...

Wrapper class representing physical registers. Should be passed by value.

constexpr unsigned id() const

Generic base class for all target subtargets.

bool hasFeature(unsigned Feature) const

const Triple & getTargetTriple() const

const FeatureBitset & getFeatureBits() const

const MDOperand & getOperand(unsigned I) const

unsigned getNumOperands() const

Return number of MDNode operands.

A Module instance is used to store all the information related to an LLVM module.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

bool getAsInteger(unsigned Radix, T &Result) const

Parse the current string as an integer of the specified radix.

constexpr bool empty() const

empty - Check if the string is empty.

constexpr size_t size() const

size - Get the string size.

bool ends_with(StringRef Suffix) const

Check if this string ends with the given Suffix.

Manages the enabling and disabling of subtarget specific features.

const std::vector< std::string > & getFeatures() const

Returns the vector of individual subtarget features.

Triple - Helper class for working with autoconf configuration names.

OSType getOS() const

Get the parsed operating system type of this triple.

ArchType getArch() const

Get the parsed architecture type of this triple.

bool isAMDGCN() const

Tests whether the target is AMDGCN.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

std::string & str()

Returns the string's reference.

A raw_ostream that writes to an SmallVector or SmallString.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

unsigned decodeFieldVaVcc(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2041

unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)

Definition AMDGPUBaseInfo.cpp:2089

unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt)

Definition AMDGPUBaseInfo.cpp:2107

bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2013

unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)

Definition AMDGPUBaseInfo.cpp:2098

unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)

Definition AMDGPUBaseInfo.cpp:2062

unsigned decodeFieldSaSdst(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2033

unsigned decodeFieldVaSdst(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2037

unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)

Definition AMDGPUBaseInfo.cpp:2053

unsigned decodeFieldVaSsrc(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2045

int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2019

unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)

Definition AMDGPUBaseInfo.cpp:2071

const CustomOperandVal DepCtrInfo[]

bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2007

unsigned decodeFieldVaVdst(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2029

unsigned decodeFieldHoldCnt(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2049

int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2000

unsigned decodeFieldVmVsrc(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2025

unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)

Definition AMDGPUBaseInfo.cpp:2080

bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2176

static constexpr ExpTgt ExpTgtInfo[]

Definition AMDGPUBaseInfo.cpp:2131

bool getTgtName(unsigned Id, StringRef &Name, int &Index)

Definition AMDGPUBaseInfo.cpp:2142

unsigned getTgtId(const StringRef Name)

Definition AMDGPUBaseInfo.cpp:2153

@ ET_DUAL_SRC_BLEND_MAX_IDX

constexpr uint32_t VersionMinor

HSA metadata minor version.

constexpr uint32_t VersionMajor

HSA metadata major version.

@ COMPLETION_ACTION_OFFSET

@ MULTIGRID_SYNC_ARG_OFFSET

unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1372

@ FIXED_NUM_SGPRS_FOR_INIT_BUG

unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1389

unsigned getArchVGPRAllocGranule()

For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...

Definition AMDGPUBaseInfo.cpp:1387

unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition AMDGPUBaseInfo.cpp:1226

unsigned getWavefrontSize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1143

unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1419

unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition AMDGPUBaseInfo.cpp:1196

unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1234

unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1217

unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition AMDGPUBaseInfo.cpp:1239

unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)

Definition AMDGPUBaseInfo.cpp:1308

unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1253

unsigned getLocalMemorySize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1152

unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1164

unsigned getEUsPerCU(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1176

unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1262

unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)

Definition AMDGPUBaseInfo.cpp:1274

static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)

Definition AMDGPUBaseInfo.cpp:1081

unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1232

unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1350

unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)

Definition AMDGPUBaseInfo.cpp:1291

unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)

Definition AMDGPUBaseInfo.cpp:1344

unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1215

unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1490

unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1244

unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1463

unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1509

unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1502

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)

Definition AMDGPUBaseInfo.cpp:1436

static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)

Definition AMDGPUBaseInfo.cpp:1339

unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1400

unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1255

unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1407

StringLiteral const UfmtSymbolicGFX11[]

bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2277

unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2302

StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2271

unsigned const DfmtNfmt2UFmtGFX10[]

StringLiteral const DfmtSymbolic[]

static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2214

bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2243

StringLiteral const NfmtSymbolicGFX10[]

bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2236

int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2281

StringRef getDfmtName(unsigned Id)

Definition AMDGPUBaseInfo.cpp:2209

int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)

Definition AMDGPUBaseInfo.cpp:2247

int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2256

bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2298

StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2231

unsigned const DfmtNfmt2UFmtGFX11[]

StringLiteral const NfmtSymbolicVI[]

StringLiteral const NfmtSymbolicSICI[]

int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2222

int64_t getDfmt(const StringRef Name)

Definition AMDGPUBaseInfo.cpp:2201

StringLiteral const UfmtSymbolicGFX10[]

void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)

Definition AMDGPUBaseInfo.cpp:2251

uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)

Definition AMDGPUBaseInfo.cpp:2386

bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2367

void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2374

bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2320

bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)

Definition AMDGPUBaseInfo.cpp:2341

StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)

Map from an encoding to the symbolic name for a sendmsg operation.

static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2316

bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2361

bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)

Definition AMDGPUBaseInfo.cpp:2324

Definition AMDGPUBaseInfo.cpp:835

constexpr unsigned VOPD_VGPR_BANK_MASKS[]

constexpr unsigned COMPONENTS_NUM

constexpr unsigned VOPD3_VGPR_BANK_MASKS[]

bool isPackedFP32Inst(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:3597

bool isGCN3Encoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2554

bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:2986

bool isGFX10_BEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2562

bool isInlineValue(MCRegister Reg)

Definition AMDGPUBaseInfo.cpp:2710

bool isGFX10_GFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2510

bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)

Definition AMDGPUBaseInfo.cpp:3118

LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)

Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...

Definition AMDGPUBaseInfo.cpp:1787

bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:3007

bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)

Is Reg - scalar register.

Definition AMDGPUBaseInfo.cpp:2609

uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)

Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.

Definition AMDGPUBaseInfo.cpp:3269

static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)

Definition AMDGPUBaseInfo.cpp:1876

MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)

If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.

Definition AMDGPUBaseInfo.cpp:2683

static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3241

bool isVOPCAsmOnly(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:561

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)

Definition AMDGPUBaseInfo.cpp:303

bool getMTBUFHasSrsrc(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:486

std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)

Definition AMDGPUBaseInfo.cpp:3307

bool getWMMAIsXDL(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:573

uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)

Definition AMDGPUBaseInfo.cpp:602

static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1929

bool isGFX10Before1030(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2550

bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)

Does this operand support only inlinable literals?

Definition AMDGPUBaseInfo.cpp:2770

unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:785

const int OPR_ID_UNSUPPORTED

bool shouldEmitConstantsToTextSection(const Triple &TT)

Definition AMDGPUBaseInfo.cpp:1567

bool isInlinableLiteralV2I16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3137

int getMTBUFElements(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:476

bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)

Definition AMDGPUBaseInfo.cpp:2616

static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)

Definition AMDGPUBaseInfo.cpp:1965

unsigned getTemporalHintType(const MCInstrDesc TID)

Definition AMDGPUBaseInfo.cpp:753

int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)

Definition AMDGPUBaseInfo.cpp:2602

bool isGFX10(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2506

bool isInlinableLiteralV2BF16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3142

unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2462

std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3024

FPType getFPDstSelType(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:773

unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)

For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.

Definition AMDGPUBaseInfo.cpp:3317

bool hasA16(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2434

bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)

Definition AMDGPUBaseInfo.cpp:3254

bool isGFX12Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2530

unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)

Definition AMDGPUBaseInfo.cpp:2451

const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)

Definition AMDGPUBaseInfo.cpp:3374

bool hasPackedD16(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2442

unsigned getStorecntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1753

unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3585

bool isGFX940(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2578

bool isInlinableLiteralV2F16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3147

bool isHsaAbi(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:198

bool isGFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2518

const int OPR_VAL_INVALID

bool getSMEMIsBuffer(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:537

bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2570

bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)

Checks if Val is inside MD, a !range-like metadata.

Definition AMDGPUBaseInfo.cpp:1692

uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)

Definition AMDGPUBaseInfo.cpp:578

unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)

Definition AMDGPUBaseInfo.cpp:653

bool isGroupSegment(const GlobalValue *GV)

Definition AMDGPUBaseInfo.cpp:1553

LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)

bool getMTBUFHasSoffset(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:491

bool hasXNACK(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2421

bool isValid32BitLiteral(uint64_t Val, bool IsFP64)

Definition AMDGPUBaseInfo.cpp:3151

static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)

Definition AMDGPUBaseInfo.cpp:1836

CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)

Definition AMDGPUBaseInfo.cpp:635

unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)

Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.

Definition AMDGPUBaseInfo.cpp:1823

bool isVOPC64DPP(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:557

int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)

Definition AMDGPUBaseInfo.cpp:501

bool getMAIIsGFX940XDL(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:568

bool isSI(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2468

unsigned getDefaultAMDHSACodeObjectVersion()

Definition AMDGPUBaseInfo.cpp:211

bool isReadOnlySegment(const GlobalValue *GV)

Definition AMDGPUBaseInfo.cpp:1561

bool isArgPassedInSGPR(const Argument *A)

Definition AMDGPUBaseInfo.cpp:3186

bool isIntrinsicAlwaysUniform(unsigned IntrID)

Definition AMDGPUBaseInfo.cpp:3352

int getMUBUFBaseOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:496

unsigned getAMDHSACodeObjectVersion(const Module &M)

Definition AMDGPUBaseInfo.cpp:202

unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition AMDGPUBaseInfo.cpp:1782

unsigned getWaitcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1757

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)

bool getVOP3IsSingle(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:552

bool isGFX9(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2480

bool isDPALU_DPP32BitOpc(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:3554

bool getVOP1IsSingle(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:542

static bool isDwordAligned(uint64_t ByteOffset)

Definition AMDGPUBaseInfo.cpp:3265

unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:625

bool isGFX10_AEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2558

bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this a KImm operand?

Definition AMDGPUBaseInfo.cpp:2741

bool getHasColorExport(const Function &F)

Definition AMDGPUBaseInfo.cpp:2400

int getMTBUFBaseOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:465

bool isGFX90A(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2574

unsigned getSamplecntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1725

unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:269

std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)

Returns a valid charcode or 0 in the first entry if this is a valid physical register name.

Definition AMDGPUBaseInfo.cpp:1575

bool hasSRAMECC(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2425

bool getHasDepthExport(const Function &F)

Definition AMDGPUBaseInfo.cpp:2407

bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2492

bool getMUBUFHasVAddr(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:512

bool isTrue16Inst(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:768

unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)

Definition AMDGPUBaseInfo.cpp:3395

std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)

Definition AMDGPUBaseInfo.cpp:826

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:2960

bool isGFX12(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2526

unsigned getInitialPSInputAddr(const Function &F)

Definition AMDGPUBaseInfo.cpp:2396

unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)

Definition AMDGPUBaseInfo.cpp:1811

bool isAsyncStore(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:737

unsigned getDynamicVGPRBlockSize(const Function &F)

Definition AMDGPUBaseInfo.cpp:2411

unsigned getKmcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1745

MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)

If Reg is a low VGPR return a corresponding high VGPR with MSBs set.

Definition AMDGPUBaseInfo.cpp:3401

unsigned getVmcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1715

bool isNotGFX10Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2546

bool hasMAIInsts(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2586

unsigned getBitOp2(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:802

bool isIntrinsicSourceOfDivergence(unsigned IntrID)

Definition AMDGPUBaseInfo.cpp:3348

unsigned getXcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1749

bool isGenericAtomic(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:715

const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)

Definition AMDGPUBaseInfo.cpp:617

Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)

Definition AMDGPUBaseInfo.cpp:1860

bool isGFX8Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2496

LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)

Is this literal inlinable, and not one of the values intended for floating point values.

unsigned getLgkmcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1737

bool getMUBUFTfe(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:532

unsigned getBvhcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1729

bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:193

bool hasMIMG_R128(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2429

bool hasGFX10_3Insts(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2566

std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)

Definition AMDGPUBaseInfo.cpp:3424

bool hasG16(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2438

unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)

Definition AMDGPUBaseInfo.cpp:323

int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)

Definition AMDGPUBaseInfo.cpp:470

unsigned getExpcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1733

bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2582

bool getMUBUFHasSoffset(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:522

bool isNotGFX11Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2544

bool isGFX11Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2522

std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3113

bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this floating-point operand?

Definition AMDGPUBaseInfo.cpp:2748

std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)

Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.

Definition AMDGPUBaseInfo.cpp:1603

unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:258

static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1917

static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)

Definition AMDGPUBaseInfo.cpp:1870

bool isGFX10Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2514

static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1948

static bool isValidRegPrefix(char C)

Definition AMDGPUBaseInfo.cpp:1571

std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)

Definition AMDGPUBaseInfo.cpp:3278

bool isGlobalSegment(const GlobalValue *GV)

Definition AMDGPUBaseInfo.cpp:1557

int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)

Definition AMDGPUBaseInfo.cpp:3158

@ OPERAND_KIMM32

Operand with 32-bit immediate that uses the constant bus.

@ OPERAND_REG_INLINE_C_LAST

@ OPERAND_REG_INLINE_C_FP64

@ OPERAND_REG_INLINE_C_BF16

@ OPERAND_REG_INLINE_C_V2BF16

@ OPERAND_REG_IMM_V2INT16

@ OPERAND_REG_IMM_INT32

Operands with register, 32-bit, or 64-bit immediate.

@ OPERAND_REG_INLINE_AC_FIRST

@ OPERAND_REG_IMM_NOINLINE_V2FP16

@ OPERAND_REG_INLINE_C_V2FP16

@ OPERAND_REG_INLINE_AC_INT32

Operands with an AccVGPR register or inline constant.

@ OPERAND_REG_INLINE_AC_FP32

@ OPERAND_REG_IMM_V2INT32

@ OPERAND_REG_INLINE_C_FIRST

@ OPERAND_REG_INLINE_C_FP32

@ OPERAND_REG_INLINE_AC_LAST

@ OPERAND_REG_INLINE_C_INT32

@ OPERAND_REG_INLINE_C_V2INT16

@ OPERAND_REG_INLINE_AC_FP64

@ OPERAND_REG_INLINE_C_FP16

@ OPERAND_INLINE_SPLIT_BARRIER_INT32

void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1519

bool isNotGFX9Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2504

bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3574

bool hasGDS(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2447

bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)

Definition AMDGPUBaseInfo.cpp:3245

bool isGFX9Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2500

bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2594

const int OPR_ID_DUPLICATE

bool isVOPD(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:660

VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)

Definition AMDGPUBaseInfo.cpp:996

unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)

Definition AMDGPUBaseInfo.cpp:1802

unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition AMDGPUBaseInfo.cpp:1777

bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:702

Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)

Definition AMDGPUBaseInfo.cpp:1850

std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3079

unsigned getRegBitWidth(const TargetRegisterClass &RC)

Get the size in bits of a register from the register class RC.

static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)

Definition AMDGPUBaseInfo.cpp:1900

bool isGFX1250(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2534

int getMCOpcode(uint16_t Opcode, unsigned Gen)

Definition AMDGPUBaseInfo.cpp:798

const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:310

bool isVI(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2476

bool isTensorStore(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:748

bool getMUBUFIsBufferInv(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:527

bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)

Definition AMDGPUBaseInfo.cpp:3517

MCRegister mc2PseudoReg(MCRegister Reg)

Convert hardware register Reg to a pseudo register.

Definition AMDGPUBaseInfo.cpp:2708

std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3085

static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1972

unsigned hasKernargPreload(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2598

bool supportsWGP(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2538

bool isMAC(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:664

bool isCI(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2472

unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)

Definition AMDGPUBaseInfo.cpp:1817

bool getVOP2IsSingle(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:547

bool getMAIIsDGEMM(unsigned Opc)

Returns true if MAI operation is a double precision GEMM.

Definition AMDGPUBaseInfo.cpp:563

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:280

SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)

Definition AMDGPUBaseInfo.cpp:1646

int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)

Definition AMDGPUBaseInfo.cpp:315

bool isNotGFX12Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2532

bool getMTBUFHasVAddr(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:481

unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition AMDGPUBaseInfo.cpp:1769

uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:228

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

Definition AMDGPUBaseInfo.cpp:1611

unsigned getLoadcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1721

bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:3003

bool hasVOPD(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2590

int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)

Definition AMDGPUBaseInfo.cpp:817

static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)

Definition AMDGPUBaseInfo.cpp:1882

bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)

Is this literal inlinable.

Definition AMDGPUBaseInfo.cpp:2943

const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)

Definition AMDGPUBaseInfo.cpp:594

unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:245

bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2488

bool isGFX9_GFX10(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2484

int getMUBUFElements(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:507

static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)

Definition AMDGPUBaseInfo.cpp:1888

const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:3356

unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:790

bool isPermlane16(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:691

bool getMUBUFHasSrsrc(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:517

unsigned getDscntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1741

bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3536

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_CS_ChainPreserve

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

@ SPIR_KERNEL

Used for SPIR kernel functions.

@ AMDGPU_ES

Used for AMDPAL shader stage before geometry shader if geometry is in use.

@ AMDGPU_LS

Used for AMDPAL vertex shader if tessellation is in use.

@ C

The default llvm calling convention, compatible with C.

@ ELFABIVERSION_AMDGPU_HSA_V4

@ ELFABIVERSION_AMDGPU_HSA_V5

@ ELFABIVERSION_AMDGPU_HSA_V6

initializer< Ty > init(const Ty &Val)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)

Extract a Value from Metadata, allowing null.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

This is an optimization pass for GlobalISel generic memory operations.

@ Low

Lower the current thread's priority such that it does not affect foreground tasks significantly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

constexpr bool isInt(int64_t x)

Checks if an integer fits into the given bit width.

testing::Matcher< const detail::ErrorHolder & > Failed()

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

std::string utostr(uint64_t X, bool isNeg=false)

FunctionAddr VTableAddr uintptr_t uintptr_t Version

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

constexpr bool isUInt(uint64_t x)

Checks if an unsigned integer fits into the given bit width.

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

To bit_cast(const From &from) noexcept

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

@ AlwaysUniform

The result values are always uniform.

@ Default

The result values are uniform if and only if all operands are uniform.

AMD Kernel Code Object (amd_kernel_code_t).

uint16_t amd_machine_version_major

uint16_t amd_machine_kind

uint16_t amd_machine_version_stepping

uint8_t private_segment_alignment

int64_t kernel_code_entry_byte_offset

uint32_t amd_kernel_code_version_major

uint16_t amd_machine_version_minor

uint8_t group_segment_alignment

uint8_t kernarg_segment_alignment

uint32_t amd_kernel_code_version_minor

uint64_t compute_pgm_resource_registers

Definition AMDGPUBaseInfo.cpp:416

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:417

bool IsDPMACCInstruction

Definition AMDGPUBaseInfo.cpp:418

unsigned Tgt

Definition AMDGPUBaseInfo.cpp:2126

StringLiteral Name

Definition AMDGPUBaseInfo.cpp:2125

unsigned MaxIndex

Definition AMDGPUBaseInfo.cpp:2127

Definition AMDGPUBaseInfo.cpp:421

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:422

bool HasFP8DstByteSel

Definition AMDGPUBaseInfo.cpp:423

bool HasFP4DstByteSel

Definition AMDGPUBaseInfo.cpp:424

Instruction set architecture version.

Definition AMDGPUBaseInfo.cpp:362

uint16_t BaseOpcode

Definition AMDGPUBaseInfo.cpp:364

bool has_srsrc

Definition AMDGPUBaseInfo.cpp:367

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:363

bool has_vaddr

Definition AMDGPUBaseInfo.cpp:366

uint8_t elements

Definition AMDGPUBaseInfo.cpp:365

bool has_soffset

Definition AMDGPUBaseInfo.cpp:368

Definition AMDGPUBaseInfo.cpp:351

bool IsBufferInv

Definition AMDGPUBaseInfo.cpp:358

bool has_srsrc

Definition AMDGPUBaseInfo.cpp:356

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:352

uint8_t elements

Definition AMDGPUBaseInfo.cpp:354

bool tfe

Definition AMDGPUBaseInfo.cpp:359

bool has_soffset

Definition AMDGPUBaseInfo.cpp:357

bool has_vaddr

Definition AMDGPUBaseInfo.cpp:355

uint16_t BaseOpcode

Definition AMDGPUBaseInfo.cpp:353

Definition AMDGPUBaseInfo.cpp:371

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:372

bool IsBuffer

Definition AMDGPUBaseInfo.cpp:373

Definition AMDGPUBaseInfo.cpp:389

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:390

Definition AMDGPUBaseInfo.cpp:381

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:382

Definition AMDGPUBaseInfo.cpp:385

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:386

Definition AMDGPUBaseInfo.cpp:393

uint16_t VOPDOp

Definition AMDGPUBaseInfo.cpp:395

uint16_t BaseVOP

Definition AMDGPUBaseInfo.cpp:394

bool CanBeVOPD3X

Definition AMDGPUBaseInfo.cpp:397

bool CanBeVOPDX

Definition AMDGPUBaseInfo.cpp:396

Definition AMDGPUBaseInfo.cpp:400

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:401

bool VOPD3

Definition AMDGPUBaseInfo.cpp:405

uint16_t OpX

Definition AMDGPUBaseInfo.cpp:402

uint16_t Subtarget

Definition AMDGPUBaseInfo.cpp:404

uint16_t OpY

Definition AMDGPUBaseInfo.cpp:403

Definition AMDGPUBaseInfo.cpp:376

bool IsSingle

Definition AMDGPUBaseInfo.cpp:378

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:377

Definition AMDGPUBaseInfo.cpp:408

bool IsTrue16

Definition AMDGPUBaseInfo.cpp:410

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:409

Represents the counter values to wait for in an s_waitcnt instruction.