AMDGPUBaseInfo.cpp Source File (original) (raw)

21#include "llvm/IR/IntrinsicsAMDGPU.h"

22#include "llvm/IR/IntrinsicsR600.h"

30#include

32#define GET_INSTRINFO_NAMED_OPS

33#define GET_INSTRMAP_INFO

34#include "AMDGPUGenInstrInfo.inc"

39 llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag "

40 "or asm directive still take priority if present)"));

42namespace {

45unsigned getBitMask(unsigned Shift, unsigned Width) {

46 return ((1 << Width) - 1) << Shift;

47}

52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {

53 unsigned Mask = getBitMask(Shift, Width);

54 return ((Src << Shift) & Mask) | (Dst & ~Mask);

55}

60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {

61 return (Src & getBitMask(Shift, Width)) >> Shift;

62}

65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {

67}

70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {

72}

75unsigned getExpcntBitShift(unsigned VersionMajor) {

77}

80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }

83unsigned getLgkmcntBitShift(unsigned VersionMajor) {

85}

88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {

90}

93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }

96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {

97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;

98}

100

101unsigned getLoadcntBitWidth(unsigned VersionMajor) {

103}

104

105

106unsigned getSamplecntBitWidth(unsigned VersionMajor) {

108}

109

110

111unsigned getBvhcntBitWidth(unsigned VersionMajor) {

113}

114

115

116unsigned getDscntBitWidth(unsigned VersionMajor) {

118}

119

120

121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }

122

123

124unsigned getStorecntBitWidth(unsigned VersionMajor) {

126}

127

128

129unsigned getKmcntBitWidth(unsigned VersionMajor) {

131}

132

133

134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {

136}

137

138

139unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {

141}

142

143

144inline unsigned getVaSdstBitWidth() { return 3; }

145

146

147inline unsigned getVaSdstBitShift() { return 9; }

148

149

150inline unsigned getVmVsrcBitWidth() { return 3; }

151

152

153inline unsigned getVmVsrcBitShift() { return 2; }

154

155

156inline unsigned getVaVdstBitWidth() { return 4; }

157

158

159inline unsigned getVaVdstBitShift() { return 12; }

160

161

162inline unsigned getVaVccBitWidth() { return 1; }

163

164

165inline unsigned getVaVccBitShift() { return 1; }

166

167

168inline unsigned getSaSdstBitWidth() { return 1; }

169

170

171inline unsigned getSaSdstBitShift() { return 0; }

172

173

174inline unsigned getVaSsrcBitWidth() { return 1; }

175

176

177inline unsigned getVaSsrcBitShift() { return 8; }

178

179

180inline unsigned getHoldCntWidth() { return 1; }

181

182

183inline unsigned getHoldCntBitShift() { return 7; }

184

185}

186

187namespace llvm {

188

190

191

192

196

197

201

204 M.getModuleFlag("amdhsa_code_object_version"))) {

205 return (unsigned)Ver->getZExtValue() / 100;

206 }

207

209}

210

214

216 switch (ABIVersion) {

218 return 4;

220 return 5;

222 return 6;

223 default:

225 }

226}

227

230 return 0;

231

232 switch (CodeObjectVersion) {

233 case 4:

235 case 5:

237 case 6:

239 default:

241 Twine(CodeObjectVersion));

242 }

243}

244

246 switch (CodeObjectVersion) {

248 return 48;

251 default:

253 }

254}

255

256

257

259 switch (CodeObjectVersion) {

261 return 24;

264 default:

266 }

267}

268

270 switch (CodeObjectVersion) {

272 return 32;

275 default:

277 }

278}

279

281 switch (CodeObjectVersion) {

283 return 40;

286 default:

288 }

289}

290

291#define GET_MIMGBaseOpcodesTable_IMPL

292#define GET_MIMGDimInfoTable_IMPL

293#define GET_MIMGInfoTable_IMPL

294#define GET_MIMGLZMappingTable_IMPL

295#define GET_MIMGMIPMappingTable_IMPL

296#define GET_MIMGBiasMappingTable_IMPL

297#define GET_MIMGOffsetMappingTable_IMPL

298#define GET_MIMGG16MappingTable_IMPL

299#define GET_MAIInstInfoTable_IMPL

300#define GET_WMMAInstInfoTable_IMPL

301#include "AMDGPUGenSearchableTables.inc"

302

304 unsigned VDataDwords, unsigned VAddrDwords) {

306 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);

307 return Info ? Info->Opcode : -1;

308}

309

314

320 return NewInfo ? NewInfo->Opcode : -1;

321}

322

325 bool IsG16Supported) {

326 unsigned AddrWords = BaseOpcode->NumExtraArgs;

329 if (IsA16)

330 AddrWords += divideCeil(AddrComponents, 2);

331 else

332 AddrWords += AddrComponents;

333

334

335

336

337

338

340 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)

341

342

343

345 else

347 }

348 return AddrWords;

349}

350

361

370

375

380

384

388

392

399

407

412

413#define GET_FP4FP8DstByteSelTable_DECL

414#define GET_FP4FP8DstByteSelTable_IMPL

415

420

426

427#define GET_MTBUFInfoTable_DECL

428#define GET_MTBUFInfoTable_IMPL

429#define GET_MUBUFInfoTable_DECL

430#define GET_MUBUFInfoTable_IMPL

431#define GET_SMInfoTable_DECL

432#define GET_SMInfoTable_IMPL

433#define GET_VOP1InfoTable_DECL

434#define GET_VOP1InfoTable_IMPL

435#define GET_VOP2InfoTable_DECL

436#define GET_VOP2InfoTable_IMPL

437#define GET_VOP3InfoTable_DECL

438#define GET_VOP3InfoTable_IMPL

439#define GET_VOPC64DPPTable_DECL

440#define GET_VOPC64DPPTable_IMPL

441#define GET_VOPC64DPP8Table_DECL

442#define GET_VOPC64DPP8Table_IMPL

443#define GET_VOPCAsmOnlyInfoTable_DECL

444#define GET_VOPCAsmOnlyInfoTable_IMPL

445#define GET_VOP3CAsmOnlyInfoTable_DECL

446#define GET_VOP3CAsmOnlyInfoTable_IMPL

447#define GET_VOPDComponentTable_DECL

448#define GET_VOPDComponentTable_IMPL

449#define GET_VOPDPairs_DECL

450#define GET_VOPDPairs_IMPL

451#define GET_VOPTrue16Table_DECL

452#define GET_VOPTrue16Table_IMPL

453#define GET_True16D16Table_IMPL

454#define GET_WMMAOpcode2AddrMappingTable_DECL

455#define GET_WMMAOpcode2AddrMappingTable_IMPL

456#define GET_WMMAOpcode3AddrMappingTable_DECL

457#define GET_WMMAOpcode3AddrMappingTable_IMPL

458#define GET_getMFMA_F8F6F4_WithSize_DECL

459#define GET_getMFMA_F8F6F4_WithSize_IMPL

460#define GET_isMFMA_F8F6F4Table_IMPL

461#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL

462

463#include "AMDGPUGenSearchableTables.inc"

464

467 return Info ? Info->BaseOpcode : -1;

468}

469

472 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);

473 return Info ? Info->Opcode : -1;

474}

475

478 return Info ? Info->elements : 0;

479}

480

483 return Info && Info->has_vaddr;

484}

485

488 return Info && Info->has_srsrc;

489}

490

493 return Info && Info->has_soffset;

494}

495

498 return Info ? Info->BaseOpcode : -1;

499}

500

503 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);

504 return Info ? Info->Opcode : -1;

505}

506

509 return Info ? Info->elements : 0;

510}

511

514 return Info && Info->has_vaddr;

515}

516

519 return Info && Info->has_srsrc;

520}

521

524 return Info && Info->has_soffset;

525}

526

529 return Info && Info->IsBufferInv;

530}

531

536

541

544 return Info || Info->IsSingle;

545}

546

549 return Info || Info->IsSingle;

550}

551

554 return Info || Info->IsSingle;

555}

556

558 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);

559}

560

562

567

570 return Info && Info->is_gfx940_xdl;

571}

572

575 return Info ? Info->is_wmma_xdl : false;

576}

577

579 switch (EncodingVal) {

582 return 6;

584 return 4;

587 default:

588 return 8;

589 }

590

592}

593

595 unsigned BLGP,

596 unsigned F8F8Opcode) {

599 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);

600}

601

603 switch (Fmt) {

606 return 16;

609 return 12;

611 return 8;

612 }

613

615}

616

618 unsigned FmtB,

619 unsigned F8F8Opcode) {

622 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);

623}

624

626 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))

628 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))

630 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))

632 llvm_unreachable("Subtarget generation does not support VOPD!");

633}

634

636 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;

637 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;

640

641

642

643

646 EncodingFamily, VOPD3) != -1;

647 return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY};

648 }

649

650 return {false, false};

651}

652

654 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;

655 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;

657 return Info ? Info->VOPDOp : ~0u;

658}

659

663

665 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||

666 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||

667 Opc == AMDGPU::V_MAC_F32_e64_vi ||

668 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||

669 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||

670 Opc == AMDGPU::V_MAC_F16_e64_vi ||

671 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||

672 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||

673 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||

674 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||

675 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||

676 Opc == AMDGPU::V_FMAC_F32_e64_vi ||

677 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||

678 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||

679 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||

680 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||

681 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||

682 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||

683 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||

684 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||

685 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||

686 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||

687 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||

688 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;

689}

690

692 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||

693 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||

694 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||

695 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||

696 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||

697 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||

698 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||

699 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;

700}

701

703 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||

704 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||

705 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||

706 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||

707 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||

708 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||

709 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||

710 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||

711 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||

712 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;

713}

714

716 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||

717 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||

718 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||

719 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||

720 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||

721 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||

722 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||

723 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||

724 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||

725 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||

726 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||

727 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||

728 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||

729 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||

730 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||

731 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||

732 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||

733 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||

734 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;

735}

736

738 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||

739 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||

740 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||

741 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||

742 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||

743 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||

744 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||

745 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;

746}

747

749 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||

750 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;

751}

752

767

772

777 if (Info->HasFP8DstByteSel)

779 if (Info->HasFP4DstByteSel)

781

783}

784

787 return Info ? Info->Opcode3Addr : ~0u;

788}

789

792 return Info ? Info->Opcode2Addr : ~0u;

793}

794

795

796

797

799 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));

800}

801

803 switch (Opc) {

804 default:

805 return 0;

806 case AMDGPU::V_AND_B32_e32:

807 return 0x40;

808 case AMDGPU::V_OR_B32_e32:

809 return 0x54;

810 case AMDGPU::V_XOR_B32_e32:

811 return 0x14;

812 case AMDGPU::V_XNOR_B32_e32:

813 return 0x41;

814 }

815}

816

817int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,

818 bool VOPD3) {

819 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;

820 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;

822 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);

823 return Info ? Info->Opcode : -1;

824}

825

827 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);

829 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);

830 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);

832 return {OpX->BaseVOP, OpY->BaseVOP};

833}

834

836

839

844 HasSrc2Acc = TiedIdx != -1;

846

851 : 1;

853

854 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||

855 Opcode == AMDGPU::V_CNDMASK_B32_e64) {

856

857

858 NumVOPD3Mods = 2;

859 if (IsVOP3)

860 SrcOperandsNum = 3;

862 getNamedOperandIdx(Opcode, OpName::src0))) {

863

864

865 NumVOPD3Mods = SrcOperandsNum;

866 if (HasSrc2Acc)

867 --NumVOPD3Mods;

868 }

869

871 return;

872

874 unsigned CompOprIdx;

875 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {

877 MandatoryLiteralIdx = CompOprIdx;

878 break;

879 }

880 }

881}

882

884 return getNamedOperandIdx(Opcode, OpName::bitop3);

885}

886

889

892

896

897

898 return 0;

899}

900

902 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,

904 bool VOPD3) const {

905

910

912 unsigned BanksMask) -> bool {

915 if (!BaseX)

916 BaseX = X;

917 if (!BaseY)

918 BaseY = Y;

919 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))

920 return true;

921 if (BaseX != X &&

922 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))

923 return true;

924 if (BaseY != Y &&

925 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))

926 return true;

927

928

929

930 return false;

931 };

932

933 unsigned CompOprIdx;

937 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])

938 continue;

939

942 return CompOprIdx;

943

945 continue;

946

948

949

950 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))

951 return CompOprIdx;

952 if (VOPD3)

953 continue;

954 }

955

956 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&

958 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))

959 return CompOprIdx;

960 }

961

962 return {};

963}

964

965

966

967

968

969

970

971

973InstInfo::getRegIndices(unsigned CompIdx,

974 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,

975 bool VOPD3) const {

977

978 const auto &Comp = CompInfo[CompIdx];

980

981 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());

982

983 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {

984 unsigned CompSrcIdx = CompOprIdx - DST_NUM;

986 Comp.hasRegSrcOperand(CompSrcIdx)

987 ? GetRegIdx(CompIdx,

988 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))

990 }

992}

993

994}

995

999

1003 const auto &OpXDesc = InstrInfo->get(OpX);

1004 const auto &OpYDesc = InstrInfo->get(OpY);

1009}

1010

1011namespace IsaInfo {

1012

1016 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))

1018 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))

1020}

1021

1023

1024

1025

1027 std::optional XnackRequested;

1028 std::optional SramEccRequested;

1029

1030 for (const std::string &Feature : Features.getFeatures()) {

1031 if (Feature == "+xnack")

1032 XnackRequested = true;

1033 else if (Feature == "-xnack")

1034 XnackRequested = false;

1035 else if (Feature == "+sramecc")

1036 SramEccRequested = true;

1037 else if (Feature == "-sramecc")

1038 SramEccRequested = false;

1039 }

1040

1043

1044 if (XnackRequested) {

1045 if (XnackSupported) {

1046 XnackSetting =

1048 } else {

1049

1050

1051 if (*XnackRequested) {

1052 errs() << "warning: xnack 'On' was requested for a processor that does "

1053 "not support it!\n";

1054 } else {

1055 errs() << "warning: xnack 'Off' was requested for a processor that "

1056 "does not support it!\n";

1057 }

1058 }

1059 }

1060

1061 if (SramEccRequested) {

1062 if (SramEccSupported) {

1063 SramEccSetting =

1065 } else {

1066

1067

1068

1069 if (*SramEccRequested) {

1070 errs() << "warning: sramecc 'On' was requested for a processor that "

1071 "does not support it!\n";

1072 } else {

1073 errs() << "warning: sramecc 'Off' was requested for a processor that "

1074 "does not support it!\n";

1075 }

1076 }

1077 }

1078}

1079

1089

1092 TargetID.split(TargetIDSplit, ':');

1093

1094 for (const auto &FeatureString : TargetIDSplit) {

1095 if (FeatureString.starts_with("xnack"))

1097 if (FeatureString.starts_with("sramecc"))

1099 }

1100}

1101

1103 std::string StringRep;

1105

1106 auto TargetTriple = STI.getTargetTriple();

1108

1109 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()

1110 << '-' << TargetTriple.getOSName() << '-'

1111 << TargetTriple.getEnvironmentName() << '-';

1112

1113 std::string Processor;

1114

1115

1116

1118 Processor = STI.getCPU().str();

1119 else

1122 .str();

1123

1124 std::string Features;

1125 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {

1126

1128 Features += ":sramecc-";

1130 Features += ":sramecc+";

1131

1133 Features += ":xnack-";

1135 Features += ":xnack+";

1136 }

1137

1138 StreamRep << Processor << Features;

1139

1140 return StringRep;

1141}

1142

1145 return 16;

1147 return 32;

1148

1149 return 64;

1150}

1151

1154

1155

1156

1157

1159 BytesPerCU *= 2;

1160

1161 return BytesPerCU;

1162}

1163

1165 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))

1166 return 32768;

1167 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))

1168 return 65536;

1169 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))

1170 return 163840;

1171 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize327680))

1172 return 327680;

1173 return 32768;

1174}

1175

1177

1178

1179

1180

1183 return 4;

1184 }

1185

1186

1187

1189 return 2;

1190

1191

1192

1193 return 4;

1194}

1195

1197 unsigned FlatWorkGroupSize) {

1198 assert(FlatWorkGroupSize != 0);

1200 return 8;

1203 if (N == 1) {

1204

1205 return MaxWaves;

1206 }

1207

1208 unsigned MaxBarriers = 16;

1210 MaxBarriers = 32;

1211

1212 return std::min(MaxWaves / N, MaxBarriers);

1213}

1214

1216

1218

1220 return 8;

1222 return 10;

1224}

1225

1227 unsigned FlatWorkGroupSize) {

1230}

1231

1233

1235

1236 return 1024;

1237}

1238

1240 unsigned FlatWorkGroupSize) {

1242}

1243

1246 if (Version.Major >= 10)

1249 return 16;

1250 return 8;

1251}

1252

1254

1258 return 800;

1259 return 512;

1260}

1261

1265

1267 if (Version.Major >= 10)

1268 return 106;

1270 return 102;

1271 return 104;

1272}

1273

1275 assert(WavesPerEU != 0);

1276

1278 if (Version.Major >= 10)

1279 return 0;

1280

1282 return 0;

1283

1284 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);

1286 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);

1289}

1290

1292 bool Addressable) {

1293 assert(WavesPerEU != 0);

1294

1297 if (Version.Major >= 10)

1298 return Addressable ? AddressableNumSGPRs : 108;

1299 if (Version.Major >= 8 && !Addressable)

1300 AddressableNumSGPRs = 112;

1303 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);

1305 return std::min(MaxNumSGPRs, AddressableNumSGPRs);

1306}

1307

1309 bool FlatScrUsed, bool XNACKUsed) {

1310 unsigned ExtraSGPRs = 0;

1311 if (VCCUsed)

1312 ExtraSGPRs = 2;

1313

1315 if (Version.Major >= 10)

1316 return ExtraSGPRs;

1317

1318 if (Version.Major < 8) {

1319 if (FlatScrUsed)

1320 ExtraSGPRs = 4;

1321 } else {

1322 if (XNACKUsed)

1323 ExtraSGPRs = 4;

1324

1325 if (FlatScrUsed ||

1327 ExtraSGPRs = 6;

1328 }

1329

1330 return ExtraSGPRs;

1331}

1332

1334 bool FlatScrUsed) {

1337}

1338

1340 unsigned Granule) {

1341 return divideCeil(std::max(1u, NumRegs), Granule);

1342}

1343

1349

1351 unsigned DynamicVGPRBlockSize,

1352 std::optional EnableWavefrontSize32) {

1354 return 8;

1355

1356 if (DynamicVGPRBlockSize != 0)

1357 return DynamicVGPRBlockSize;

1358

1359 bool IsWave32 = EnableWavefrontSize32

1360 ? *EnableWavefrontSize32

1362

1364 return IsWave32 ? 24 : 12;

1365

1367 return IsWave32 ? 16 : 8;

1368

1369 return IsWave32 ? 8 : 4;

1370}

1371

1373 std::optional EnableWavefrontSize32) {

1375 return 8;

1376

1377 bool IsWave32 = EnableWavefrontSize32

1378 ? *EnableWavefrontSize32

1380

1382 return IsWave32 ? 16 : 8;

1383

1384 return IsWave32 ? 8 : 4;

1385}

1386

1388

1391 return 512;

1393 return 256;

1396 return IsWave32 ? 1536 : 768;

1397 return IsWave32 ? 1024 : 512;

1398}

1399

1402 if (Features.test(Feature1024AddressableVGPRs))

1403 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;

1404 return 256;

1405}

1406

1408 unsigned DynamicVGPRBlockSize) {

1410 if (Features.test(FeatureGFX90AInsts))

1411 return 512;

1412

1413 if (DynamicVGPRBlockSize != 0)

1414

1417}

1418

1420 unsigned NumVGPRs,

1421 unsigned DynamicVGPRBlockSize) {

1425}

1426

1428 unsigned MaxWaves,

1429 unsigned TotalNumVGPRs) {

1430 if (NumVGPRs < Granule)

1431 return MaxWaves;

1432 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);

1433 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);

1434}

1435

1439 return MaxWaves;

1440

1442 if (SGPRs <= 80)

1443 return 10;

1444 if (SGPRs <= 88)

1445 return 9;

1446 if (SGPRs <= 100)

1447 return 8;

1448 return 7;

1449 }

1450 if (SGPRs <= 48)

1451 return 10;

1452 if (SGPRs <= 56)

1453 return 9;

1454 if (SGPRs <= 64)

1455 return 8;

1456 if (SGPRs <= 72)

1457 return 7;

1458 if (SGPRs <= 80)

1459 return 6;

1460 return 5;

1461}

1462

1464 unsigned DynamicVGPRBlockSize) {

1465 assert(WavesPerEU != 0);

1466

1468 if (WavesPerEU >= MaxWavesPerEU)

1469 return 0;

1470

1472 unsigned AddrsableNumVGPRs =

1475 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);

1476

1477 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))

1478 return 0;

1479

1481 DynamicVGPRBlockSize);

1482 if (WavesPerEU < MinWavesPerEU)

1483 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);

1484

1485 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);

1486 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);

1487 return std::min(MinNumVGPRs, AddrsableNumVGPRs);

1488}

1489

1491 unsigned DynamicVGPRBlockSize) {

1492 assert(WavesPerEU != 0);

1493

1494 unsigned MaxNumVGPRs =

1497 unsigned AddressableNumVGPRs =

1499 return std::min(MaxNumVGPRs, AddressableNumVGPRs);

1500}

1501

1503 std::optional EnableWavefrontSize32) {

1506 1;

1507}

1508

1510 unsigned NumVGPRs,

1511 unsigned DynamicVGPRBlockSize,

1512 std::optional EnableWavefrontSize32) {

1514 NumVGPRs,

1516}

1517}

1518

1532 } else {

1534 }

1535

1536

1537

1539

1540

1541

1545

1546 if (Version.Major >= 10) {

1550 }

1551}

1552

1556

1560

1566

1570

1572 return C == 'v' || C == 's' || C == 'a';

1573}

1574

1576 char Kind = RegName.front();

1578 return {};

1579

1581 if (RegName.consume_front("[")) {

1582 unsigned Idx, End;

1588 unsigned NumRegs = End - Idx + 1;

1589 if (NumRegs > 1)

1590 return {Kind, Idx, NumRegs};

1591 }

1592 } else {

1593 unsigned Idx;

1596 return {Kind, Idx, 1};

1597 }

1598

1599 return {};

1600}

1601

1602std::tuple<char, unsigned, unsigned>

1605 if ( RegName .consume_front("{") || RegName .consume_back("}"))

1606 return {};

1608}

1609

1610std::pair<unsigned, unsigned>

1612 std::pair<unsigned, unsigned> Default,

1613 bool OnlyFirstRequired) {

1615 return {Attr->first, Attr->second.value_or(Default.second)};

1617}

1618

1619std::optional<std::pair<unsigned, std::optional>>

1621 bool OnlyFirstRequired) {

1623 if (.isStringAttribute())

1624 return std::nullopt;

1625

1627 std::pair<unsigned, std::optional> Ints;

1628 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');

1629 if (Strs.first.trim().getAsInteger(0, Ints.first)) {

1630 Ctx.emitError("can't parse first integer attribute " + Name);

1631 return std::nullopt;

1632 }

1633 unsigned Second = 0;

1634 if (Strs.second.trim().getAsInteger(0, Second)) {

1635 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {

1636 Ctx.emitError("can't parse second integer attribute " + Name);

1637 return std::nullopt;

1638 }

1639 } else {

1640 Ints.second = Second;

1641 }

1642

1643 return Ints;

1644}

1645

1647 unsigned Size,

1649 std::optional<SmallVector> R =

1652}

1653

1654std::optional<SmallVector>

1658

1660 if (.isValid())

1661 return std::nullopt;

1662 if (.isStringAttribute()) {

1663 Ctx.emitError(Name + " is not a string attribute");

1664 return std::nullopt;

1665 }

1666

1668

1670 unsigned i = 0;

1671 for (; !S.empty() && i < Size; i++) {

1672 std::pair<StringRef, StringRef> Strs = S.split(',');

1673 unsigned IntVal;

1674 if (Strs.first.trim().getAsInteger(0, IntVal)) {

1675 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +

1676 Name);

1677 return std::nullopt;

1678 }

1679 Vals[i] = IntVal;

1680 S = Strs.second;

1681 }

1682

1684 Ctx.emitError("attribute " + Name +

1685 " has incorrect number of integers; expected " +

1687 return std::nullopt;

1688 }

1689 return Vals;

1690}

1691

1695 auto Low =

1699

1700

1701

1702

1704 if (Low.ule(Val) && High.ugt(Val))

1705 return true;

1706 } else {

1707 if (Low.uge(Val) && High.ult(Val))

1708 return true;

1709 }

1710 }

1711

1712 return false;

1713}

1714

1717 if (Wait.LoadCnt != ~0u)

1718 OS << LS << "LoadCnt: " << Wait.LoadCnt;

1719 if (Wait.ExpCnt != ~0u)

1720 OS << LS << "ExpCnt: " << Wait.ExpCnt;

1721 if (Wait.DsCnt != ~0u)

1722 OS << LS << "DsCnt: " << Wait.DsCnt;

1723 if (Wait.StoreCnt != ~0u)

1724 OS << LS << "StoreCnt: " << Wait.StoreCnt;

1725 if (Wait.SampleCnt != ~0u)

1726 OS << LS << "SampleCnt: " << Wait.SampleCnt;

1727 if (Wait.BvhCnt != ~0u)

1728 OS << LS << "BvhCnt: " << Wait.BvhCnt;

1729 if (Wait.KmCnt != ~0u)

1730 OS << LS << "KmCnt: " << Wait.KmCnt;

1731 if (Wait.XCnt != ~0u)

1732 OS << LS << "XCnt: " << Wait.XCnt;

1733 if (LS.unused())

1734 OS << "none";

1735 OS << '\n';

1736 return OS;

1737}

1738

1740 return (1 << (getVmcntBitWidthLo(Version.Major) +

1741 getVmcntBitWidthHi(Version.Major))) -

1742 1;

1743}

1744

1746 return (1 << getLoadcntBitWidth(Version.Major)) - 1;

1747}

1748

1750 return (1 << getSamplecntBitWidth(Version.Major)) - 1;

1751}

1752

1754 return (1 << getBvhcntBitWidth(Version.Major)) - 1;

1755}

1756

1758 return (1 << getExpcntBitWidth(Version.Major)) - 1;

1759}

1760

1762 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;

1763}

1764

1766 return (1 << getDscntBitWidth(Version.Major)) - 1;

1767}

1768

1770 return (1 << getKmcntBitWidth(Version.Major)) - 1;

1771}

1772

1774 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;

1775}

1776

1778 return (1 << getStorecntBitWidth(Version.Major)) - 1;

1779}

1780

1782 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),

1783 getVmcntBitWidthLo(Version.Major));

1784 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),

1785 getExpcntBitWidth(Version.Major));

1786 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),

1787 getLgkmcntBitWidth(Version.Major));

1788 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),

1789 getVmcntBitWidthHi(Version.Major));

1790 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;

1791}

1792

1794 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),

1795 getVmcntBitWidthLo(Version.Major));

1796 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),

1797 getVmcntBitWidthHi(Version.Major));

1798 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);

1799}

1800

1802 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),

1803 getExpcntBitWidth(Version.Major));

1804}

1805

1807 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),

1808 getLgkmcntBitWidth(Version.Major));

1809}

1810

1812 unsigned &Expcnt, unsigned &Lgkmcnt) {

1816}

1817

1823 return Decoded;

1824}

1825

1827 unsigned Vmcnt) {

1829 getVmcntBitWidthLo(Version.Major));

1830 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,

1831 getVmcntBitShiftHi(Version.Major),

1832 getVmcntBitWidthHi(Version.Major));

1833}

1834

1836 unsigned Expcnt) {

1837 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),

1838 getExpcntBitWidth(Version.Major));

1839}

1840

1842 unsigned Lgkmcnt) {

1843 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),

1844 getLgkmcntBitWidth(Version.Major));

1845}

1846

1848 unsigned Expcnt, unsigned Lgkmcnt) {

1854}

1855

1859

1861 bool IsStore) {

1862 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),

1863 getDscntBitWidth(Version.Major));

1864 if (IsStore) {

1865 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),

1866 getStorecntBitWidth(Version.Major));

1867 return Dscnt | Storecnt;

1868 }

1869 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),

1870 getLoadcntBitWidth(Version.Major));

1871 return Dscnt | Loadcnt;

1872}

1873

1877 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),

1878 getLoadcntBitWidth(Version.Major));

1879 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),

1880 getDscntBitWidth(Version.Major));

1881 return Decoded;

1882}

1883

1887 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),

1888 getStorecntBitWidth(Version.Major));

1889 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),

1890 getDscntBitWidth(Version.Major));

1891 return Decoded;

1892}

1893

1895 unsigned Loadcnt) {

1896 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),

1897 getLoadcntBitWidth(Version.Major));

1898}

1899

1901 unsigned Storecnt) {

1902 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),

1903 getStorecntBitWidth(Version.Major));

1904}

1905

1907 unsigned Dscnt) {

1908 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),

1909 getDscntBitWidth(Version.Major));

1910}

1911

1913 unsigned Dscnt) {

1918}

1919

1923

1925 unsigned Storecnt, unsigned Dscnt) {

1930}

1931

1933 const Waitcnt &Decoded) {

1935}

1936

1937

1938

1939

1940

1944 unsigned Enc = 0;

1945 for (int Idx = 0; Idx < Size; ++Idx) {

1946 const auto &Op = Opr[Idx];

1947 if (Op.isSupported(STI))

1948 Enc |= Op.encode(Op.Default);

1949 }

1950 return Enc;

1951}

1952

1954 int Size, unsigned Code,

1955 bool &HasNonDefaultVal,

1957 unsigned UsedOprMask = 0;

1958 HasNonDefaultVal = false;

1959 for (int Idx = 0; Idx < Size; ++Idx) {

1960 const auto &Op = Opr[Idx];

1961 if (.isSupported(STI))

1962 continue;

1963 UsedOprMask |= Op.getMask();

1964 unsigned Val = Op.decode(Code);

1965 if (.isValid(Val))

1966 return false;

1967 HasNonDefaultVal |= (Val != Op.Default);

1968 }

1969 return (Code & ~UsedOprMask) == 0;

1970}

1971

1973 unsigned Code, int &Idx, StringRef &Name,

1974 unsigned &Val, bool &IsDefault,

1976 while (Idx < Size) {

1977 const auto &Op = Opr[Idx++];

1978 if (Op.isSupported(STI)) {

1979 Name = Op.Name;

1980 Val = Op.decode(Code);

1981 IsDefault = (Val == Op.Default);

1982 return true;

1983 }

1984 }

1985

1986 return false;

1987}

1988

1990 int64_t InputVal) {

1991 if (InputVal < 0 || InputVal > Op.Max)

1993 return Op.encode(InputVal);

1994}

1995

1997 const StringRef Name, int64_t InputVal,

1998 unsigned &UsedOprMask,

2001 for (int Idx = 0; Idx < Size; ++Idx) {

2002 const auto &Op = Opr[Idx];

2003 if (Op.Name == Name) {

2004 if (.isSupported(STI)) {

2006 continue;

2007 }

2008 auto OprMask = Op.getMask();

2009 if (OprMask & UsedOprMask)

2011 UsedOprMask |= OprMask;

2013 }

2014 }

2015 return InvalidId;

2016}

2017

2018

2019

2020

2021

2022namespace DepCtr {

2023

2025 static int Default = -1;

2029}

2030

2034 HasNonDefaultVal, STI);

2035}

2036

2040 IsDefault, STI);

2041}

2042

2048

2050 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());

2051}

2052

2054 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());

2055}

2056

2058 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());

2059}

2060

2062 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());

2063}

2064

2066 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());

2067}

2068

2070 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());

2071}

2072

2074 return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());

2075}

2076

2078 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());

2079}

2080

2085

2087 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());

2088}

2089

2094

2096 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());

2097}

2098

2103

2105 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());

2106}

2107

2112

2114 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());

2115}

2116

2121

2123 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());

2124}

2125

2130

2132 return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());

2133}

2134

2139

2140}

2141

2142

2143

2144

2145

2146namespace Exp {

2147

2153

2154

2164

2165

2168 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {

2169 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);

2170 Name = Val.Name;

2171 return true;

2172 }

2173 }

2174 return false;

2175}

2176

2178

2180 if (Val.MaxIndex == 0 && Name == Val.Name)

2181 return Val.Tgt;

2182

2183 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {

2184 StringRef Suffix = Name.drop_front(Val.Name.size());

2185

2186 unsigned Id;

2187 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)

2189

2190

2191 if (Suffix.size() > 1 && Suffix[0] == '0')

2193

2194 return Val.Tgt + Id;

2195 }

2196 }

2198}

2199

2201 switch (Id) {

2210 default:

2213 return true;

2214 }

2215}

2216

2217}

2218

2219

2220

2221

2222

2223namespace MTBUFFormat {

2224

2228 return Id;

2229 }

2231}

2232

2237

2245

2249 if (Name == lookupTable[Id])

2250 return Id;

2251 }

2253}

2254

2259

2261 unsigned Dfmt;

2262 unsigned Nfmt;

2265}

2266

2270

2274

2279

2284 return Id;

2285 }

2286 } else {

2289 return Id;

2290 }

2291 }

2293}

2294

2300

2304

2311 return Id;

2312 }

2313 } else {

2316 return Id;

2317 }

2318 }

2320}

2321

2325

2331

2332}

2333

2334

2335

2336

2337

2338namespace SendMsg {

2339

2343

2347

2349 bool Strict) {

2351

2352 if (!Strict)

2354

2357 return false;

2358

2360 }

2361

2363}

2364

2368

2369 if (!Strict)

2371

2373 switch (MsgId) {

2380 }

2381 }

2383}

2384

2390

2397

2402 OpId = 0;

2404 } else {

2407 }

2408}

2409

2413

2414}

2415

2416

2417

2418

2419

2421 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);

2422}

2423

2425

2426 return F.getFnAttributeAsParsedInteger(

2427 "amdgpu-color-export",

2429}

2430

2432 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;

2433}

2434

2437 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);

2438

2441

2442 return 0;

2443}

2444

2446 return STI.hasFeature(AMDGPU::FeatureXNACK);

2447}

2448

2450 return STI.hasFeature(AMDGPU::FeatureSRAMECC);

2451}

2452

2454 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&

2455 !STI.hasFeature(AMDGPU::FeatureR128A16);

2456}

2457

2459 return STI.hasFeature(AMDGPU::FeatureA16);

2460}

2461

2463 return STI.hasFeature(AMDGPU::FeatureG16);

2464}

2465

2467 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && isCI (STI) &&

2469}

2470

2472 return STI.hasFeature(AMDGPU::FeatureGDS);

2473}

2474

2477 if (Version.Major == 10)

2478 return Version.Minor >= 3 ? 13 : 5;

2479 if (Version.Major == 11)

2480 return 5;

2481 if (Version.Major >= 12)

2482 return HasSampler ? 4 : 5;

2483 return 0;

2484}

2485

2488 return 32;

2489 return 16;

2490}

2491

2493 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);

2494}

2495

2497 return STI.hasFeature(AMDGPU::FeatureSeaIslands);

2498}

2499

2501 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);

2502}

2503

2505 return STI.hasFeature(AMDGPU::FeatureGFX9);

2506}

2507

2511

2515

2519

2523

2527

2529

2531 return STI.hasFeature(AMDGPU::FeatureGFX10);

2532}

2533

2537

2541

2543 return STI.hasFeature(AMDGPU::FeatureGFX11);

2544}

2545

2549

2553

2555

2557

2561

2567

2569

2573

2577

2579 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);

2580}

2581

2583 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);

2584}

2585

2587 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);

2588}

2589

2591 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);

2592}

2593

2597

2599 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);

2600}

2601

2603 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);

2604}

2605

2607 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);

2608}

2609

2611 return STI.hasFeature(AMDGPU::FeatureMAIInsts);

2612}

2613

2615 return STI.hasFeature(AMDGPU::FeatureVOPD);

2616}

2617

2619 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);

2620}

2621

2623 return STI.hasFeature(AMDGPU::FeatureKernargPreload);

2624}

2625

2627 int32_t ArgNumVGPR) {

2628 if (has90AInsts && ArgNumAGPR)

2629 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;

2630 return std::max(ArgNumVGPR, ArgNumAGPR);

2631}

2632

2634 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);

2635 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);

2636 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||

2637 Reg == AMDGPU::SCC;

2638}

2639

2643

2644#define MAP_REG2REG \

2645 using namespace AMDGPU; \

2646 switch (Reg.id()) { \

2647 default: \

2648 return Reg; \

2649 CASE_CI_VI(FLAT_SCR) \

2650 CASE_CI_VI(FLAT_SCR_LO) \

2651 CASE_CI_VI(FLAT_SCR_HI) \

2652 CASE_VI_GFX9PLUS(TTMP0) \

2653 CASE_VI_GFX9PLUS(TTMP1) \

2654 CASE_VI_GFX9PLUS(TTMP2) \

2655 CASE_VI_GFX9PLUS(TTMP3) \

2656 CASE_VI_GFX9PLUS(TTMP4) \

2657 CASE_VI_GFX9PLUS(TTMP5) \

2658 CASE_VI_GFX9PLUS(TTMP6) \

2659 CASE_VI_GFX9PLUS(TTMP7) \

2660 CASE_VI_GFX9PLUS(TTMP8) \

2661 CASE_VI_GFX9PLUS(TTMP9) \

2662 CASE_VI_GFX9PLUS(TTMP10) \

2663 CASE_VI_GFX9PLUS(TTMP11) \

2664 CASE_VI_GFX9PLUS(TTMP12) \

2665 CASE_VI_GFX9PLUS(TTMP13) \

2666 CASE_VI_GFX9PLUS(TTMP14) \

2667 CASE_VI_GFX9PLUS(TTMP15) \

2668 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \

2669 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \

2670 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \

2671 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \

2672 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \

2673 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \

2674 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \

2675 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \

2676 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \

2677 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \

2678 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \

2679 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \

2680 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \

2681 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \

2682 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \

2683 CASE_VI_GFX9PLUS( \

2684 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \

2685 CASE_GFXPRE11_GFX11PLUS(M0) \

2686 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \

2687 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \

2688 }

2689

2690#define CASE_CI_VI(node) \

2691 assert(!isSI(STI)); \

2692 case node: \

2693 return isCI(STI) ? node##_ci : node##_vi;

2694

2695#define CASE_VI_GFX9PLUS(node) \

2696 case node: \

2697 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;

2698

2699#define CASE_GFXPRE11_GFX11PLUS(node) \

2700 case node: \

2701 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;

2702

2703#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \

2704 case node: \

2705 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;

2706

2712

2713#undef CASE_CI_VI

2714#undef CASE_VI_GFX9PLUS

2715#undef CASE_GFXPRE11_GFX11PLUS

2716#undef CASE_GFXPRE11_GFX11PLUS_TO

2717

2718#define CASE_CI_VI(node) \

2719 case node##_ci: \

2720 case node##_vi: \

2721 return node;

2722#define CASE_VI_GFX9PLUS(node) \

2723 case node##_vi: \

2724 case node##_gfx9plus: \

2725 return node;

2726#define CASE_GFXPRE11_GFX11PLUS(node) \

2727 case node##_gfx11plus: \

2728 case node##_gfxpre11: \

2729 return node;

2730#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)

2731

2733

2735 switch (Reg.id()) {

2736 case AMDGPU::SRC_SHARED_BASE_LO:

2737 case AMDGPU::SRC_SHARED_BASE:

2738 case AMDGPU::SRC_SHARED_LIMIT_LO:

2739 case AMDGPU::SRC_SHARED_LIMIT:

2740 case AMDGPU::SRC_PRIVATE_BASE_LO:

2741 case AMDGPU::SRC_PRIVATE_BASE:

2742 case AMDGPU::SRC_PRIVATE_LIMIT_LO:

2743 case AMDGPU::SRC_PRIVATE_LIMIT:

2744 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:

2745 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:

2746 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:

2747 return true;

2748 case AMDGPU::SRC_VCCZ:

2749 case AMDGPU::SRC_EXECZ:

2750 case AMDGPU::SRC_SCC:

2751 return true;

2752 case AMDGPU::SGPR_NULL:

2753 return true;

2754 default:

2755 return false;

2756 }

2757}

2758

2759#undef CASE_CI_VI

2760#undef CASE_VI_GFX9PLUS

2761#undef CASE_GFXPRE11_GFX11PLUS

2762#undef CASE_GFXPRE11_GFX11PLUS_TO

2763#undef MAP_REG2REG

2764

2767 unsigned OpType = Desc.operands()[OpNo].OperandType;

2770}

2771

2774 unsigned OpType = Desc.operands()[OpNo].OperandType;

2775 switch (OpType) {

2788 return true;

2789 default:

2790 return false;

2791 }

2792}

2793

2796 unsigned OpType = Desc.operands()[OpNo].OperandType;

2801}

2802

2803

2804

2806 switch (RCID) {

2807 case AMDGPU::VGPR_16RegClassID:

2808 case AMDGPU::VGPR_16_Lo128RegClassID:

2809 case AMDGPU::SGPR_LO16RegClassID:

2810 case AMDGPU::AGPR_LO16RegClassID:

2811 return 16;

2812 case AMDGPU::SGPR_32RegClassID:

2813 case AMDGPU::VGPR_32RegClassID:

2814 case AMDGPU::VGPR_32_Lo256RegClassID:

2815 case AMDGPU::VRegOrLds_32RegClassID:

2816 case AMDGPU::AGPR_32RegClassID:

2817 case AMDGPU::VS_32RegClassID:

2818 case AMDGPU::AV_32RegClassID:

2819 case AMDGPU::SReg_32RegClassID:

2820 case AMDGPU::SReg_32_XM0RegClassID:

2821 case AMDGPU::SRegOrLds_32RegClassID:

2822 return 32;

2823 case AMDGPU::SGPR_64RegClassID:

2824 case AMDGPU::VS_64RegClassID:

2825 case AMDGPU::SReg_64RegClassID:

2826 case AMDGPU::VReg_64RegClassID:

2827 case AMDGPU::AReg_64RegClassID:

2828 case AMDGPU::SReg_64_XEXECRegClassID:

2829 case AMDGPU::VReg_64_Align2RegClassID:

2830 case AMDGPU::AReg_64_Align2RegClassID:

2831 case AMDGPU::AV_64RegClassID:

2832 case AMDGPU::AV_64_Align2RegClassID:

2833 case AMDGPU::VReg_64_Lo256_Align2RegClassID:

2834 case AMDGPU::VS_64_Lo256RegClassID:

2835 return 64;

2836 case AMDGPU::SGPR_96RegClassID:

2837 case AMDGPU::SReg_96RegClassID:

2838 case AMDGPU::VReg_96RegClassID:

2839 case AMDGPU::AReg_96RegClassID:

2840 case AMDGPU::VReg_96_Align2RegClassID:

2841 case AMDGPU::AReg_96_Align2RegClassID:

2842 case AMDGPU::AV_96RegClassID:

2843 case AMDGPU::AV_96_Align2RegClassID:

2844 case AMDGPU::VReg_96_Lo256_Align2RegClassID:

2845 return 96;

2846 case AMDGPU::SGPR_128RegClassID:

2847 case AMDGPU::SReg_128RegClassID:

2848 case AMDGPU::VReg_128RegClassID:

2849 case AMDGPU::AReg_128RegClassID:

2850 case AMDGPU::VReg_128_Align2RegClassID:

2851 case AMDGPU::AReg_128_Align2RegClassID:

2852 case AMDGPU::AV_128RegClassID:

2853 case AMDGPU::AV_128_Align2RegClassID:

2854 case AMDGPU::SReg_128_XNULLRegClassID:

2855 case AMDGPU::VReg_128_Lo256_Align2RegClassID:

2856 return 128;

2857 case AMDGPU::SGPR_160RegClassID:

2858 case AMDGPU::SReg_160RegClassID:

2859 case AMDGPU::VReg_160RegClassID:

2860 case AMDGPU::AReg_160RegClassID:

2861 case AMDGPU::VReg_160_Align2RegClassID:

2862 case AMDGPU::AReg_160_Align2RegClassID:

2863 case AMDGPU::AV_160RegClassID:

2864 case AMDGPU::AV_160_Align2RegClassID:

2865 case AMDGPU::VReg_160_Lo256_Align2RegClassID:

2866 return 160;

2867 case AMDGPU::SGPR_192RegClassID:

2868 case AMDGPU::SReg_192RegClassID:

2869 case AMDGPU::VReg_192RegClassID:

2870 case AMDGPU::AReg_192RegClassID:

2871 case AMDGPU::VReg_192_Align2RegClassID:

2872 case AMDGPU::AReg_192_Align2RegClassID:

2873 case AMDGPU::AV_192RegClassID:

2874 case AMDGPU::AV_192_Align2RegClassID:

2875 case AMDGPU::VReg_192_Lo256_Align2RegClassID:

2876 return 192;

2877 case AMDGPU::SGPR_224RegClassID:

2878 case AMDGPU::SReg_224RegClassID:

2879 case AMDGPU::VReg_224RegClassID:

2880 case AMDGPU::AReg_224RegClassID:

2881 case AMDGPU::VReg_224_Align2RegClassID:

2882 case AMDGPU::AReg_224_Align2RegClassID:

2883 case AMDGPU::AV_224RegClassID:

2884 case AMDGPU::AV_224_Align2RegClassID:

2885 case AMDGPU::VReg_224_Lo256_Align2RegClassID:

2886 return 224;

2887 case AMDGPU::SGPR_256RegClassID:

2888 case AMDGPU::SReg_256RegClassID:

2889 case AMDGPU::VReg_256RegClassID:

2890 case AMDGPU::AReg_256RegClassID:

2891 case AMDGPU::VReg_256_Align2RegClassID:

2892 case AMDGPU::AReg_256_Align2RegClassID:

2893 case AMDGPU::AV_256RegClassID:

2894 case AMDGPU::AV_256_Align2RegClassID:

2895 case AMDGPU::SReg_256_XNULLRegClassID:

2896 case AMDGPU::VReg_256_Lo256_Align2RegClassID:

2897 return 256;

2898 case AMDGPU::SGPR_288RegClassID:

2899 case AMDGPU::SReg_288RegClassID:

2900 case AMDGPU::VReg_288RegClassID:

2901 case AMDGPU::AReg_288RegClassID:

2902 case AMDGPU::VReg_288_Align2RegClassID:

2903 case AMDGPU::AReg_288_Align2RegClassID:

2904 case AMDGPU::AV_288RegClassID:

2905 case AMDGPU::AV_288_Align2RegClassID:

2906 case AMDGPU::VReg_288_Lo256_Align2RegClassID:

2907 return 288;

2908 case AMDGPU::SGPR_320RegClassID:

2909 case AMDGPU::SReg_320RegClassID:

2910 case AMDGPU::VReg_320RegClassID:

2911 case AMDGPU::AReg_320RegClassID:

2912 case AMDGPU::VReg_320_Align2RegClassID:

2913 case AMDGPU::AReg_320_Align2RegClassID:

2914 case AMDGPU::AV_320RegClassID:

2915 case AMDGPU::AV_320_Align2RegClassID:

2916 case AMDGPU::VReg_320_Lo256_Align2RegClassID:

2917 return 320;

2918 case AMDGPU::SGPR_352RegClassID:

2919 case AMDGPU::SReg_352RegClassID:

2920 case AMDGPU::VReg_352RegClassID:

2921 case AMDGPU::AReg_352RegClassID:

2922 case AMDGPU::VReg_352_Align2RegClassID:

2923 case AMDGPU::AReg_352_Align2RegClassID:

2924 case AMDGPU::AV_352RegClassID:

2925 case AMDGPU::AV_352_Align2RegClassID:

2926 case AMDGPU::VReg_352_Lo256_Align2RegClassID:

2927 return 352;

2928 case AMDGPU::SGPR_384RegClassID:

2929 case AMDGPU::SReg_384RegClassID:

2930 case AMDGPU::VReg_384RegClassID:

2931 case AMDGPU::AReg_384RegClassID:

2932 case AMDGPU::VReg_384_Align2RegClassID:

2933 case AMDGPU::AReg_384_Align2RegClassID:

2934 case AMDGPU::AV_384RegClassID:

2935 case AMDGPU::AV_384_Align2RegClassID:

2936 case AMDGPU::VReg_384_Lo256_Align2RegClassID:

2937 return 384;

2938 case AMDGPU::SGPR_512RegClassID:

2939 case AMDGPU::SReg_512RegClassID:

2940 case AMDGPU::VReg_512RegClassID:

2941 case AMDGPU::AReg_512RegClassID:

2942 case AMDGPU::VReg_512_Align2RegClassID:

2943 case AMDGPU::AReg_512_Align2RegClassID:

2944 case AMDGPU::AV_512RegClassID:

2945 case AMDGPU::AV_512_Align2RegClassID:

2946 case AMDGPU::VReg_512_Lo256_Align2RegClassID:

2947 return 512;

2948 case AMDGPU::SGPR_1024RegClassID:

2949 case AMDGPU::SReg_1024RegClassID:

2950 case AMDGPU::VReg_1024RegClassID:

2951 case AMDGPU::AReg_1024RegClassID:

2952 case AMDGPU::VReg_1024_Align2RegClassID:

2953 case AMDGPU::AReg_1024_Align2RegClassID:

2954 case AMDGPU::AV_1024RegClassID:

2955 case AMDGPU::AV_1024_Align2RegClassID:

2956 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:

2957 return 1024;

2958 default:

2960 }

2961}

2962

2966

2969 return true;

2970

2981 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);

2982}

2983

2986 return true;

2987

2988

2989

2990

2991

2992

2993

2994

2995

2996

3007 (Val == 0x3e22f983 && HasInv2Pi);

3008}

3009

3011 if (!HasInv2Pi)

3012 return false;

3014 return true;

3016 return Val == 0x3F00 ||

3017 Val == 0xBF00 ||

3018 Val == 0x3F80 ||

3019 Val == 0xBF80 ||

3020 Val == 0x4000 ||

3021 Val == 0xC000 ||

3022 Val == 0x4080 ||

3023 Val == 0xC080 ||

3024 Val == 0x3E22;

3025}

3026

3030

3032 if (!HasInv2Pi)

3033 return false;

3035 return true;

3037 return Val == 0x3C00 ||

3038 Val == 0xBC00 ||

3039 Val == 0x3800 ||

3040 Val == 0xB800 ||

3041 Val == 0x4000 ||

3042 Val == 0xC000 ||

3043 Val == 0x4400 ||

3044 Val == 0xC400 ||

3045 Val == 0x3118;

3046}

3047

3049

3050

3051

3052

3053

3054

3055

3056

3057

3058

3061 return 128 + Signed;

3062

3064 return 192 + std::abs(Signed);

3065

3066 if (IsFloat) {

3067

3069 case 0x3800: return 240;

3070 case 0xB800: return 241;

3071 case 0x3C00: return 242;

3072 case 0xBC00: return 243;

3073 case 0x4000: return 244;

3074 case 0xC000: return 245;

3075 case 0x4400: return 246;

3076 case 0xC400: return 247;

3077 case 0x3118: return 248;

3078 default: break;

3079 }

3080

3081 } else {

3082

3084 case 0x3F000000: return 240;

3085 case 0xBF000000: return 241;

3086 case 0x3F800000: return 242;

3087 case 0xBF800000: return 243;

3088 case 0x40000000: return 244;

3089 case 0xC0000000: return 245;

3090 case 0x40800000: return 246;

3091 case 0xC0800000: return 247;

3092 case 0x3E22F983: return 248;

3093 default: break;

3094 }

3095

3096 }

3097

3098 return {};

3099}

3100

3101

3102

3106

3107

3108

3112 return 128 + Signed;

3113

3115 return 192 + std::abs(Signed);

3116

3117

3119 case 0x3F00: return 240;

3120 case 0xBF00: return 241;

3121 case 0x3F80: return 242;

3122 case 0xBF80: return 243;

3123 case 0x4000: return 244;

3124 case 0xC000: return 245;

3125 case 0x4080: return 246;

3126 case 0xC080: return 247;

3127 case 0x3E22: return 248;

3128 default: break;

3129 }

3130

3131

3132 return std::nullopt;

3133}

3134

3135

3136

3140

3141

3143 switch (OpType) {

3154 return false;

3155 default:

3157 }

3158}

3159

3160

3164

3165

3169

3170

3174

3176 if (IsFP64)

3177 return $Lo\_32$ (Val);

3178

3180}

3181

3183 switch (Type) {

3184 default:

3185 break;

3190 return Imm & 0xffff;

3206 }

3207 return Imm;

3208}

3209

3212

3213

3215 switch (CC) {

3218 return true;

3229

3230

3231 return A->hasAttribute(Attribute::InReg) ||

3232 A->hasAttribute(Attribute::ByVal);

3233 default:

3234

3235 return A->hasAttribute(Attribute::InReg);

3236 }

3237}

3238

3240

3242 switch (CC) {

3245 return true;

3256

3257

3258 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||

3260 default:

3261 return CB->paramHasAttr(ArgNo, Attribute::InReg);

3262 }

3263}

3264

3268

3270 int64_t EncodedOffset) {

3273

3276}

3277

3279 int64_t EncodedOffset, bool IsBuffer) {

3281 if (IsBuffer && EncodedOffset < 0)

3282 return false;

3283 return isInt<24>(EncodedOffset);

3284 }

3285

3287}

3288

3290 return (ByteOffset & 3) == 0;

3291}

3292

3296 return ByteOffset;

3297

3299 return ByteOffset >> 2;

3300}

3301

3303 int64_t ByteOffset, bool IsBuffer,

3304 bool HasSOffset) {

3305

3306

3307

3309 return std::nullopt;

3310

3311 if (isGFX12Plus(ST))

3312 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)

3313 : std::nullopt;

3314

3315

3318 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)

3319 : std::nullopt;

3320 }

3321

3323 return std::nullopt;

3324

3327 ? std::optional<int64_t>(EncodedOffset)

3328 : std::nullopt;

3329}

3330

3332 int64_t ByteOffset) {

3334 return std::nullopt;

3335

3337 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)

3338 : std::nullopt;

3339}

3340

3343 return 12;

3344

3346 return 24;

3347 return 13;

3348}

3349

3350namespace {

3351

3352struct SourceOfDivergence {

3353 unsigned Intr;

3354};

3355const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);

3356

3358 unsigned Intr;

3359};

3360const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);

3361

3362#define GET_SourcesOfDivergence_IMPL

3363#define GET_UniformIntrinsics_IMPL

3364#define GET_Gfx9BufferFormat_IMPL

3365#define GET_Gfx10BufferFormat_IMPL

3366#define GET_Gfx11PlusBufferFormat_IMPL

3367

3368#include "AMDGPUGenSearchableTables.inc"

3369

3370}

3371

3373 return lookupSourceOfDivergence(IntrID);

3374}

3375

3377 return lookupAlwaysUniform(IntrID);

3378}

3379

3384 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(

3385 BitsPerComp, NumComponents, NumFormat)

3387 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)

3388 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);

3389}

3390

3394 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)

3395 : getGfx9BufferFormatInfo(Format);

3396}

3397

3400 const unsigned VGPRClasses[] = {

3401 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,

3402 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,

3403 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,

3404 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,

3405 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,

3406 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,

3407 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,

3408 AMDGPU::VReg_1024RegClassID};

3409

3410 for (unsigned RCID : VGPRClasses) {

3413 return &RC;

3414 }

3415

3416 return nullptr;

3417}

3418

3420 unsigned Enc = MRI.getEncodingValue(Reg);

3422 return Idx >> 8;

3423}

3424

3427 unsigned Enc = MRI.getEncodingValue(Reg);

3429 if (Idx >= 0x100)

3431

3433 if (!RC)

3435

3436 Idx |= MSBs << 8;

3437 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {

3438

3439 Idx *= 2;

3441 ++Idx;

3442 }

3443

3445}

3446

3447std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>

3449 static const AMDGPU::OpName VOPOps[4] = {

3450 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,

3451 AMDGPU::OpName::vdst};

3452 static const AMDGPU::OpName VDSOps[4] = {

3453 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,

3454 AMDGPU::OpName::vdst};

3455 static const AMDGPU::OpName FLATOps[4] = {

3456 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,

3457 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};

3458 static const AMDGPU::OpName BUFOps[4] = {

3459 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,

3460 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};

3461 static const AMDGPU::OpName VIMGOps[4] = {

3462 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,

3463 AMDGPU::OpName::vdata};

3464

3465

3466

3467

3468 static const AMDGPU::OpName VOPDOpsX[4] = {

3469 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,

3470 AMDGPU::OpName::vdstX};

3471 static const AMDGPU::OpName VOPDOpsY[4] = {

3472 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,

3473 AMDGPU::OpName::vdstY};

3474

3475

3476 static const AMDGPU::OpName VOP2MADMKOps[4] = {

3477 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,

3478 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};

3479 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {

3480 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,

3481 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};

3482 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {

3483 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,

3484 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};

3485

3486 unsigned TSFlags = Desc.TSFlags;

3487

3488 if (TSFlags &

3491 switch (Desc.getOpcode()) {

3492

3493 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:

3494 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:

3495 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:

3496 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:

3497 return {};

3498 case AMDGPU::V_FMAMK_F16:

3499 case AMDGPU::V_FMAMK_F16_t16:

3500 case AMDGPU::V_FMAMK_F16_t16_gfx12:

3501 case AMDGPU::V_FMAMK_F16_fake16:

3502 case AMDGPU::V_FMAMK_F16_fake16_gfx12:

3503 case AMDGPU::V_FMAMK_F32:

3504 case AMDGPU::V_FMAMK_F32_gfx12:

3505 case AMDGPU::V_FMAMK_F64:

3506 case AMDGPU::V_FMAMK_F64_gfx1250:

3507 return {VOP2MADMKOps, nullptr};

3508 default:

3509 break;

3510 }

3511 return {VOPOps, nullptr};

3512 }

3513

3515 return {VDSOps, nullptr};

3516

3518 return {FLATOps, nullptr};

3519

3521 return {BUFOps, nullptr};

3522

3524 return {VIMGOps, nullptr};

3525

3528 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,

3529 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};

3530 }

3531

3533

3535 llvm_unreachable("Sample and export VGPR lowering is not implemented and"

3536 " these instructions are not expected on gfx1250");

3537

3538 return {};

3539}

3540

3543

3547 return false;

3548

3549

3552

3553

3556

3557 return false;

3558}

3559

3562 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {

3563 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);

3564 if (Idx == -1)

3565 continue;

3566

3570 if (RegClass == AMDGPU::VReg_64RegClassID ||

3571 RegClass == AMDGPU::VReg_64_Align2RegClassID)

3572 return true;

3573 }

3574

3575 return false;

3576}

3577

3579 switch (Opc) {

3580 case AMDGPU::V_MUL_LO_U32_e64:

3581 case AMDGPU::V_MUL_LO_U32_e64_dpp:

3582 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:

3583 case AMDGPU::V_MUL_HI_U32_e64:

3584 case AMDGPU::V_MUL_HI_U32_e64_dpp:

3585 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:

3586 case AMDGPU::V_MUL_HI_I32_e64:

3587 case AMDGPU::V_MUL_HI_I32_e64_dpp:

3588 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:

3589 case AMDGPU::V_MAD_U32_e64:

3590 case AMDGPU::V_MAD_U32_e64_dpp:

3591 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:

3592 return true;

3593 default:

3594 return false;

3595 }

3596}

3597

3600 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))

3601 return false;

3602

3604 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);

3605

3607}

3608

3610 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))

3611 return 64;

3612 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))

3613 return 128;

3614 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))

3615 return 320;

3616 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))

3617 return 512;

3618 return 64;

3619}

3620

3622 switch (Opc) {

3623 case AMDGPU::V_PK_ADD_F32:

3624 case AMDGPU::V_PK_ADD_F32_gfx12:

3625 case AMDGPU::V_PK_MUL_F32:

3626 case AMDGPU::V_PK_MUL_F32_gfx12:

3627 case AMDGPU::V_PK_FMA_F32:

3628 case AMDGPU::V_PK_FMA_F32_gfx12:

3629 return true;

3630 default:

3631 return false;

3632 }

3633}

3634

3639

3643

3646 return "";

3648 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;

3649 return Buffer.c_str();

3650 }

3652 OS << EncoVariableDims << ',' << EncoVariableDims << ','

3653 << EncoVariableDims;

3654 return Buffer.c_str();

3655 }

3657 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];

3658 return Buffer.c_str();

3659 }

3660 }

3662}

3663

3665 std::optional<SmallVector> Attr =

3668

3669 if (!Attr.has_value())

3671 else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; }))

3673 else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; }))

3675

3678 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};

3679

3680 return A;

3681}

3682

3683}

3684

3687 switch (S) {

3689 OS << "Unsupported";

3690 break;

3692 OS << "Any";

3693 break;

3695 OS << "Off";

3696 break;

3698 OS << "On";

3699 break;

3700 }

3701 return OS;

3702}

3703

3704}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static llvm:🆑:opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm:🆑:Hidden, llvm:🆑:init(llvm::AMDGPU::AMDHSA_COV6), llvm:🆑:desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))

#define MAP_REG2REG

Definition AMDGPUBaseInfo.cpp:2644

Provides AMDGPU specific target descriptions.

MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32

This file contains the simple types necessary to represent the attributes associated with functions a...

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Analysis containing CSE Info

This file contains the declarations for the subclasses of Constant, which represent the different fla...

#define S_00B848_MEM_ORDERED(x)

#define S_00B848_WGP_MODE(x)

#define S_00B848_FWD_PROGRESS(x)

unsigned unsigned DefaultVal

static const int BlockSize

static ClusterDimsAttr get(const Function &F)

Definition AMDGPUBaseInfo.cpp:3664

ClusterDimsAttr()=default

std::string to_string() const

Definition AMDGPUBaseInfo.cpp:3640

const std::array< unsigned, 3 > & getDims() const

Definition AMDGPUBaseInfo.cpp:3635

bool isSramEccSupported() const

void setTargetIDFromFeaturesString(StringRef FS)

Definition AMDGPUBaseInfo.cpp:1022

TargetIDSetting getXnackSetting() const

AMDGPUTargetID(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1013

bool isXnackSupported() const

void setTargetIDFromTargetIDStream(StringRef TargetID)

Definition AMDGPUBaseInfo.cpp:1090

std::string toString() const

Definition AMDGPUBaseInfo.cpp:1102

TargetIDSetting getSramEccSetting() const

unsigned getIndexInParsedOperands(unsigned CompOprIdx) const

Definition AMDGPUBaseInfo.cpp:887

unsigned getIndexOfDstInParsedOperands() const

unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const

int getBitOp3OperandIdx() const

Definition AMDGPUBaseInfo.cpp:883

unsigned getCompParsedSrcOperandsNum() const

std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const

Definition AMDGPUBaseInfo.cpp:901

std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices

This class represents an incoming formal argument to a Function.

Functions, function parameters, and return types can have attributes to indicate how they should be t...

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

CallingConv::ID getCallingConv() const

LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const

Determine whether the argument or parameter has the given attribute.

constexpr bool test(unsigned I) const

unsigned getAddressSpace() const

This is an important class for using LLVM in a threaded context.

A helper class to return the specified delimiter string after the first invocation of operator String...

Describe properties that are true of each instruction in the target description file.

unsigned getNumOperands() const

Return the number of declared MachineOperands for this MachineInstruction.

ArrayRef< MCOperandInfo > operands() const

bool mayStore() const

Return true if this instruction could possibly modify memory.

bool mayLoad() const

Return true if this instruction could possibly read memory.

unsigned getNumDefs() const

Return the number of MachineOperands that are register definitions.

int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const

Returns the value of the specified operand constraint if it is present.

unsigned getOpcode() const

Return the opcode number for this descriptor.

Interface to description of machine instruction set.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const

Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.

This holds information about one operand of a machine instruction, indicating the register class for ...

MCRegisterClass - Base class of TargetRegisterClass.

unsigned getID() const

getID() - Return the register class ID number.

MCRegister getRegister(unsigned i) const

getRegister - Return the specified register in the class.

bool contains(MCRegister Reg) const

contains - Return true if the specified register is included in this register class.

MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...

Wrapper class representing physical registers. Should be passed by value.

constexpr unsigned id() const

Generic base class for all target subtargets.

bool hasFeature(unsigned Feature) const

const Triple & getTargetTriple() const

const FeatureBitset & getFeatureBits() const

const MDOperand & getOperand(unsigned I) const

unsigned getNumOperands() const

Return number of MDNode operands.

A Module instance is used to store all the information related to an LLVM module.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

bool getAsInteger(unsigned Radix, T &Result) const

Parse the current string as an integer of the specified radix.

constexpr bool empty() const

empty - Check if the string is empty.

constexpr size_t size() const

size - Get the string size.

bool ends_with(StringRef Suffix) const

Check if this string ends with the given Suffix.

Manages the enabling and disabling of subtarget specific features.

const std::vector< std::string > & getFeatures() const

Returns the vector of individual subtarget features.

Triple - Helper class for working with autoconf configuration names.

OSType getOS() const

Get the parsed operating system type of this triple.

ArchType getArch() const

Get the parsed architecture type of this triple.

bool isAMDGCN() const

Tests whether the target is AMDGCN.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

std::string & str()

Returns the string's reference.

A raw_ostream that writes to an SmallVector or SmallString.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

unsigned decodeFieldVaVcc(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2065

unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)

Definition AMDGPUBaseInfo.cpp:2113

unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt)

Definition AMDGPUBaseInfo.cpp:2131

bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2037

unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)

Definition AMDGPUBaseInfo.cpp:2122

unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)

Definition AMDGPUBaseInfo.cpp:2086

unsigned decodeFieldSaSdst(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2057

unsigned decodeFieldVaSdst(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2061

unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)

Definition AMDGPUBaseInfo.cpp:2077

unsigned decodeFieldVaSsrc(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2069

int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2043

unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)

Definition AMDGPUBaseInfo.cpp:2095

const CustomOperandVal DepCtrInfo[]

bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2031

unsigned decodeFieldVaVdst(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2053

unsigned decodeFieldHoldCnt(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2073

int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2024

unsigned decodeFieldVmVsrc(unsigned Encoded)

Definition AMDGPUBaseInfo.cpp:2049

unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)

Definition AMDGPUBaseInfo.cpp:2104

bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2200

static constexpr ExpTgt ExpTgtInfo[]

Definition AMDGPUBaseInfo.cpp:2155

bool getTgtName(unsigned Id, StringRef &Name, int &Index)

Definition AMDGPUBaseInfo.cpp:2166

unsigned getTgtId(const StringRef Name)

Definition AMDGPUBaseInfo.cpp:2177

@ ET_DUAL_SRC_BLEND_MAX_IDX

constexpr uint32_t VersionMinor

HSA metadata minor version.

constexpr uint32_t VersionMajor

HSA metadata major version.

@ COMPLETION_ACTION_OFFSET

@ MULTIGRID_SYNC_ARG_OFFSET

unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1372

@ FIXED_NUM_SGPRS_FOR_INIT_BUG

unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1389

unsigned getArchVGPRAllocGranule()

For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...

Definition AMDGPUBaseInfo.cpp:1387

unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition AMDGPUBaseInfo.cpp:1226

unsigned getWavefrontSize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1143

unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1419

unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition AMDGPUBaseInfo.cpp:1196

unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1234

unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1217

unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)

Definition AMDGPUBaseInfo.cpp:1239

unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)

Definition AMDGPUBaseInfo.cpp:1308

unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1253

unsigned getLocalMemorySize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1152

unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1164

unsigned getEUsPerCU(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1176

unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1262

unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)

Definition AMDGPUBaseInfo.cpp:1274

static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)

Definition AMDGPUBaseInfo.cpp:1081

unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1232

unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1350

unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)

Definition AMDGPUBaseInfo.cpp:1291

unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)

Definition AMDGPUBaseInfo.cpp:1344

unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1215

unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1490

unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1244

unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1463

unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1509

unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1502

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)

Definition AMDGPUBaseInfo.cpp:1436

static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)

Definition AMDGPUBaseInfo.cpp:1339

unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1400

unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1255

unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)

Definition AMDGPUBaseInfo.cpp:1407

StringLiteral const UfmtSymbolicGFX11[]

bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2301

unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2326

StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2295

unsigned const DfmtNfmt2UFmtGFX10[]

StringLiteral const DfmtSymbolic[]

static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2238

bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2267

StringLiteral const NfmtSymbolicGFX10[]

bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2260

int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2305

StringRef getDfmtName(unsigned Id)

Definition AMDGPUBaseInfo.cpp:2233

int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)

Definition AMDGPUBaseInfo.cpp:2271

int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2280

bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2322

StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2255

unsigned const DfmtNfmt2UFmtGFX11[]

StringLiteral const NfmtSymbolicVI[]

StringLiteral const NfmtSymbolicSICI[]

int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2246

int64_t getDfmt(const StringRef Name)

Definition AMDGPUBaseInfo.cpp:2225

StringLiteral const UfmtSymbolicGFX10[]

void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)

Definition AMDGPUBaseInfo.cpp:2275

uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)

Definition AMDGPUBaseInfo.cpp:2410

bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2391

void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2398

bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2344

bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)

Definition AMDGPUBaseInfo.cpp:2365

StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)

Map from an encoding to the symbolic name for a sendmsg operation.

static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2340

bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2385

bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)

Definition AMDGPUBaseInfo.cpp:2348

Definition AMDGPUBaseInfo.cpp:835

constexpr unsigned VOPD_VGPR_BANK_MASKS[]

constexpr unsigned COMPONENTS_NUM

constexpr unsigned VOPD3_VGPR_BANK_MASKS[]

bool isPackedFP32Inst(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:3621

bool isGCN3Encoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2578

bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:3010

bool isGFX10_BEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2586

bool isInlineValue(MCRegister Reg)

Definition AMDGPUBaseInfo.cpp:2734

bool isGFX10_GFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2534

bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)

Definition AMDGPUBaseInfo.cpp:3142

LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)

Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...

Definition AMDGPUBaseInfo.cpp:1811

bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:3031

bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)

Is Reg - scalar register.

Definition AMDGPUBaseInfo.cpp:2633

uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)

Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.

Definition AMDGPUBaseInfo.cpp:3293

static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)

Definition AMDGPUBaseInfo.cpp:1900

MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)

If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.

Definition AMDGPUBaseInfo.cpp:2707

static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3265

bool isVOPCAsmOnly(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:561

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)

Definition AMDGPUBaseInfo.cpp:303

bool getMTBUFHasSrsrc(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:486

std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)

Definition AMDGPUBaseInfo.cpp:3331

bool getWMMAIsXDL(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:573

uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)

Definition AMDGPUBaseInfo.cpp:602

static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1953

bool isGFX10Before1030(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2574

bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)

Does this operand support only inlinable literals?

Definition AMDGPUBaseInfo.cpp:2794

unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:785

const int OPR_ID_UNSUPPORTED

bool shouldEmitConstantsToTextSection(const Triple &TT)

Definition AMDGPUBaseInfo.cpp:1567

bool isInlinableLiteralV2I16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3161

int getMTBUFElements(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:476

bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)

Definition AMDGPUBaseInfo.cpp:2640

static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)

Definition AMDGPUBaseInfo.cpp:1989

unsigned getTemporalHintType(const MCInstrDesc TID)

Definition AMDGPUBaseInfo.cpp:753

int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)

Definition AMDGPUBaseInfo.cpp:2626

bool isGFX10(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2530

bool isInlinableLiteralV2BF16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3166

unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2486

std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3048

FPType getFPDstSelType(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:773

unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)

For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.

Definition AMDGPUBaseInfo.cpp:3341

bool hasA16(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2458

bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)

Definition AMDGPUBaseInfo.cpp:3278

bool isGFX12Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2554

unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)

Definition AMDGPUBaseInfo.cpp:2475

const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)

Definition AMDGPUBaseInfo.cpp:3398

bool hasPackedD16(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2466

unsigned getStorecntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1777

unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3609

bool isGFX940(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2602

bool isInlinableLiteralV2F16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3171

bool isHsaAbi(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:198

bool isGFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2542

const int OPR_VAL_INVALID

bool getSMEMIsBuffer(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:537

bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2594

bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)

Checks if Val is inside MD, a !range-like metadata.

Definition AMDGPUBaseInfo.cpp:1692

uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)

Definition AMDGPUBaseInfo.cpp:578

unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)

Definition AMDGPUBaseInfo.cpp:653

bool isGroupSegment(const GlobalValue *GV)

Definition AMDGPUBaseInfo.cpp:1553

LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)

bool getMTBUFHasSoffset(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:491

bool hasXNACK(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2445

bool isValid32BitLiteral(uint64_t Val, bool IsFP64)

Definition AMDGPUBaseInfo.cpp:3175

static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)

Definition AMDGPUBaseInfo.cpp:1860

CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)

Definition AMDGPUBaseInfo.cpp:635

unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)

Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.

Definition AMDGPUBaseInfo.cpp:1847

bool isVOPC64DPP(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:557

int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)

Definition AMDGPUBaseInfo.cpp:501

bool getMAIIsGFX940XDL(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:568

bool isSI(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2492

unsigned getDefaultAMDHSACodeObjectVersion()

Definition AMDGPUBaseInfo.cpp:211

bool isReadOnlySegment(const GlobalValue *GV)

Definition AMDGPUBaseInfo.cpp:1561

bool isArgPassedInSGPR(const Argument *A)

Definition AMDGPUBaseInfo.cpp:3210

bool isIntrinsicAlwaysUniform(unsigned IntrID)

Definition AMDGPUBaseInfo.cpp:3376

int getMUBUFBaseOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:496

unsigned getAMDHSACodeObjectVersion(const Module &M)

Definition AMDGPUBaseInfo.cpp:202

unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition AMDGPUBaseInfo.cpp:1806

unsigned getWaitcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1781

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)

bool getVOP3IsSingle(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:552

bool isGFX9(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2504

bool isDPALU_DPP32BitOpc(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:3578

bool getVOP1IsSingle(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:542

static bool isDwordAligned(uint64_t ByteOffset)

Definition AMDGPUBaseInfo.cpp:3289

unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:625

bool isGFX10_AEncoding(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2582

bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this a KImm operand?

Definition AMDGPUBaseInfo.cpp:2765

bool getHasColorExport(const Function &F)

Definition AMDGPUBaseInfo.cpp:2424

int getMTBUFBaseOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:465

bool isGFX90A(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2598

unsigned getSamplecntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1749

unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:269

std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)

Returns a valid charcode or 0 in the first entry if this is a valid physical register name.

Definition AMDGPUBaseInfo.cpp:1575

bool hasSRAMECC(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2449

bool getHasDepthExport(const Function &F)

Definition AMDGPUBaseInfo.cpp:2431

bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2516

bool getMUBUFHasVAddr(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:512

bool isTrue16Inst(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:768

unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)

Definition AMDGPUBaseInfo.cpp:3419

std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)

Definition AMDGPUBaseInfo.cpp:826

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:2984

bool isGFX12(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2550

unsigned getInitialPSInputAddr(const Function &F)

Definition AMDGPUBaseInfo.cpp:2420

unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)

Definition AMDGPUBaseInfo.cpp:1835

bool isAsyncStore(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:737

unsigned getDynamicVGPRBlockSize(const Function &F)

Definition AMDGPUBaseInfo.cpp:2435

unsigned getKmcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1769

MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)

If Reg is a low VGPR return a corresponding high VGPR with MSBs set.

Definition AMDGPUBaseInfo.cpp:3425

unsigned getVmcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1739

bool isNotGFX10Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2570

bool hasMAIInsts(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2610

unsigned getBitOp2(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:802

bool isIntrinsicSourceOfDivergence(unsigned IntrID)

Definition AMDGPUBaseInfo.cpp:3372

unsigned getXcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1773

bool isGenericAtomic(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:715

const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)

Definition AMDGPUBaseInfo.cpp:617

Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)

Definition AMDGPUBaseInfo.cpp:1884

bool isGFX8Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2520

LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)

Is this literal inlinable, and not one of the values intended for floating point values.

unsigned getLgkmcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1761

bool getMUBUFTfe(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:532

unsigned getBvhcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1753

bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:193

bool hasMIMG_R128(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2453

bool hasGFX10_3Insts(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2590

std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)

Definition AMDGPUBaseInfo.cpp:3448

bool hasG16(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2462

unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)

Definition AMDGPUBaseInfo.cpp:323

int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)

Definition AMDGPUBaseInfo.cpp:470

unsigned getExpcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1757

bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2606

bool getMUBUFHasSoffset(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:522

bool isNotGFX11Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2568

bool isGFX11Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2546

std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3137

bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this floating-point operand?

Definition AMDGPUBaseInfo.cpp:2772

std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)

Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.

Definition AMDGPUBaseInfo.cpp:1603

unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:258

static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1941

static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)

Definition AMDGPUBaseInfo.cpp:1894

bool isGFX10Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2538

static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1972

static bool isValidRegPrefix(char C)

Definition AMDGPUBaseInfo.cpp:1571

std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)

Definition AMDGPUBaseInfo.cpp:3302

bool isGlobalSegment(const GlobalValue *GV)

Definition AMDGPUBaseInfo.cpp:1557

int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)

Definition AMDGPUBaseInfo.cpp:3182

@ OPERAND_KIMM32

Operand with 32-bit immediate that uses the constant bus.

@ OPERAND_REG_INLINE_C_LAST

@ OPERAND_REG_INLINE_C_FP64

@ OPERAND_REG_INLINE_C_BF16

@ OPERAND_REG_INLINE_C_V2BF16

@ OPERAND_REG_IMM_V2INT16

@ OPERAND_REG_IMM_INT32

Operands with register, 32-bit, or 64-bit immediate.

@ OPERAND_REG_INLINE_AC_FIRST

@ OPERAND_REG_IMM_NOINLINE_V2FP16

@ OPERAND_REG_INLINE_C_V2FP16

@ OPERAND_REG_INLINE_AC_INT32

Operands with an AccVGPR register or inline constant.

@ OPERAND_REG_INLINE_AC_FP32

@ OPERAND_REG_IMM_V2INT32

@ OPERAND_REG_INLINE_C_FIRST

@ OPERAND_REG_INLINE_C_FP32

@ OPERAND_REG_INLINE_AC_LAST

@ OPERAND_REG_INLINE_C_INT32

@ OPERAND_REG_INLINE_C_V2INT16

@ OPERAND_REG_INLINE_AC_FP64

@ OPERAND_REG_INLINE_C_FP16

@ OPERAND_INLINE_SPLIT_BARRIER_INT32

raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)

Definition AMDGPUBaseInfo.cpp:1715

void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)

Definition AMDGPUBaseInfo.cpp:1519

bool isNotGFX9Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2528

bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3598

bool hasGDS(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2471

bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)

Definition AMDGPUBaseInfo.cpp:3269

bool isGFX9Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2524

bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2618

const int OPR_ID_DUPLICATE

bool isVOPD(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:660

VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)

Definition AMDGPUBaseInfo.cpp:996

unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)

Definition AMDGPUBaseInfo.cpp:1826

unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition AMDGPUBaseInfo.cpp:1801

bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:702

Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)

Definition AMDGPUBaseInfo.cpp:1874

std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3103

unsigned getRegBitWidth(const TargetRegisterClass &RC)

Get the size in bits of a register from the register class RC.

static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)

Definition AMDGPUBaseInfo.cpp:1924

bool isGFX1250(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2558

int getMCOpcode(uint16_t Opcode, unsigned Gen)

Definition AMDGPUBaseInfo.cpp:798

const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:310

bool isVI(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2500

bool isTensorStore(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:748

bool getMUBUFIsBufferInv(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:527

bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)

Definition AMDGPUBaseInfo.cpp:3541

MCRegister mc2PseudoReg(MCRegister Reg)

Convert hardware register Reg to a pseudo register.

Definition AMDGPUBaseInfo.cpp:2732

std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)

Definition AMDGPUBaseInfo.cpp:3109

static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:1996

unsigned hasKernargPreload(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2622

bool supportsWGP(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2562

bool isMAC(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:664

bool isCI(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2496

unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)

Definition AMDGPUBaseInfo.cpp:1841

bool getVOP2IsSingle(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:547

bool getMAIIsDGEMM(unsigned Opc)

Returns true if MAI operation is a double precision GEMM.

Definition AMDGPUBaseInfo.cpp:563

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:280

SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)

Definition AMDGPUBaseInfo.cpp:1646

int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)

Definition AMDGPUBaseInfo.cpp:315

bool isNotGFX12Plus(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2556

bool getMTBUFHasVAddr(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:481

unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)

Definition AMDGPUBaseInfo.cpp:1793

uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:228

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

Definition AMDGPUBaseInfo.cpp:1611

unsigned getLoadcntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1745

bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:3027

bool hasVOPD(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2614

int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)

Definition AMDGPUBaseInfo.cpp:817

static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)

Definition AMDGPUBaseInfo.cpp:1906

bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)

Is this literal inlinable.

Definition AMDGPUBaseInfo.cpp:2967

const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)

Definition AMDGPUBaseInfo.cpp:594

unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)

Definition AMDGPUBaseInfo.cpp:245

bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2512

bool isGFX9_GFX10(const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:2508

int getMUBUFElements(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:507

static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)

Definition AMDGPUBaseInfo.cpp:1912

const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)

Definition AMDGPUBaseInfo.cpp:3380

unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:790

bool isPermlane16(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:691

bool getMUBUFHasSrsrc(unsigned Opc)

Definition AMDGPUBaseInfo.cpp:517

unsigned getDscntBitMask(const IsaVersion &Version)

Definition AMDGPUBaseInfo.cpp:1765

bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)

Definition AMDGPUBaseInfo.cpp:3560

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_CS_ChainPreserve

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

@ SPIR_KERNEL

Used for SPIR kernel functions.

@ AMDGPU_ES

Used for AMDPAL shader stage before geometry shader if geometry is in use.

@ AMDGPU_LS

Used for AMDPAL vertex shader if tessellation is in use.

@ C

The default llvm calling convention, compatible with C.

@ ELFABIVERSION_AMDGPU_HSA_V4

@ ELFABIVERSION_AMDGPU_HSA_V5

@ ELFABIVERSION_AMDGPU_HSA_V6

initializer< Ty > init(const Ty &Val)

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)

Extract a Value from Metadata, allowing null.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

This is an optimization pass for GlobalISel generic memory operations.

@ Low

Lower the current thread's priority such that it does not affect foreground tasks significantly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

constexpr bool isInt(int64_t x)

Checks if an integer fits into the given bit width.

testing::Matcher< const detail::ErrorHolder & > Failed()

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

std::string utostr(uint64_t X, bool isNeg=false)

FunctionAddr VTableAddr uintptr_t uintptr_t Version

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

constexpr bool isUInt(uint64_t x)

Checks if an unsigned integer fits into the given bit width.

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

To bit_cast(const From &from) noexcept

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

@ AlwaysUniform

The result values are always uniform.

@ Default

The result values are uniform if and only if all operands are uniform.

AMD Kernel Code Object (amd_kernel_code_t).

uint16_t amd_machine_version_major

uint16_t amd_machine_kind

uint16_t amd_machine_version_stepping

uint8_t private_segment_alignment

int64_t kernel_code_entry_byte_offset

uint32_t amd_kernel_code_version_major

uint16_t amd_machine_version_minor

uint8_t group_segment_alignment

uint8_t kernarg_segment_alignment

uint32_t amd_kernel_code_version_minor

uint64_t compute_pgm_resource_registers

Definition AMDGPUBaseInfo.cpp:416

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:417

bool IsDPMACCInstruction

Definition AMDGPUBaseInfo.cpp:418

unsigned Tgt

Definition AMDGPUBaseInfo.cpp:2150

StringLiteral Name

Definition AMDGPUBaseInfo.cpp:2149

unsigned MaxIndex

Definition AMDGPUBaseInfo.cpp:2151

Definition AMDGPUBaseInfo.cpp:421

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:422

bool HasFP8DstByteSel

Definition AMDGPUBaseInfo.cpp:423

bool HasFP4DstByteSel

Definition AMDGPUBaseInfo.cpp:424

Instruction set architecture version.

Definition AMDGPUBaseInfo.cpp:362

uint16_t BaseOpcode

Definition AMDGPUBaseInfo.cpp:364

bool has_srsrc

Definition AMDGPUBaseInfo.cpp:367

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:363

bool has_vaddr

Definition AMDGPUBaseInfo.cpp:366

uint8_t elements

Definition AMDGPUBaseInfo.cpp:365

bool has_soffset

Definition AMDGPUBaseInfo.cpp:368

Definition AMDGPUBaseInfo.cpp:351

bool IsBufferInv

Definition AMDGPUBaseInfo.cpp:358

bool has_srsrc

Definition AMDGPUBaseInfo.cpp:356

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:352

uint8_t elements

Definition AMDGPUBaseInfo.cpp:354

bool tfe

Definition AMDGPUBaseInfo.cpp:359

bool has_soffset

Definition AMDGPUBaseInfo.cpp:357

bool has_vaddr

Definition AMDGPUBaseInfo.cpp:355

uint16_t BaseOpcode

Definition AMDGPUBaseInfo.cpp:353

Definition AMDGPUBaseInfo.cpp:371

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:372

bool IsBuffer

Definition AMDGPUBaseInfo.cpp:373

Definition AMDGPUBaseInfo.cpp:389

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:390

Definition AMDGPUBaseInfo.cpp:381

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:382

Definition AMDGPUBaseInfo.cpp:385

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:386

Definition AMDGPUBaseInfo.cpp:393

uint16_t VOPDOp

Definition AMDGPUBaseInfo.cpp:395

uint16_t BaseVOP

Definition AMDGPUBaseInfo.cpp:394

bool CanBeVOPD3X

Definition AMDGPUBaseInfo.cpp:397

bool CanBeVOPDX

Definition AMDGPUBaseInfo.cpp:396

Definition AMDGPUBaseInfo.cpp:400

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:401

bool VOPD3

Definition AMDGPUBaseInfo.cpp:405

uint16_t OpX

Definition AMDGPUBaseInfo.cpp:402

uint16_t Subtarget

Definition AMDGPUBaseInfo.cpp:404

uint16_t OpY

Definition AMDGPUBaseInfo.cpp:403

Definition AMDGPUBaseInfo.cpp:376

bool IsSingle

Definition AMDGPUBaseInfo.cpp:378

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:377

Definition AMDGPUBaseInfo.cpp:408

bool IsTrue16

Definition AMDGPUBaseInfo.cpp:410

uint16_t Opcode

Definition AMDGPUBaseInfo.cpp:409

Represents the counter values to wait for in an s_waitcnt instruction.