LLVM: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

49

50using namespace llvm;

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

78}

79

82 std::unique_ptr &&Streamer) {

84}

85

91}

92

94 std::unique_ptr Streamer)

97}

98

100 return "AMDGPU Assembly Printer";

101}

102

105}

106

109 return nullptr;

111}

112

115}

116

117void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {

119

120

121

123 initializeTargetID(M);

124

127 return;

128

130

133 CodeObjectVersion);

134 HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());

135 }

136

139}

140

142

144 initTargetStreamer(M);

145

148

149

150

152 HSAMetadataStream->end();

156 }

157}

158

163

164

165 if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {

167 STM.getCPU() + " is only available on code object version 6 or better",

168 false);

169 }

170

171

172

174 initializeTargetID(*F.getParent());

175

176 const auto &FunctionTargetID = STM.getTargetID();

177

178

179 if (FunctionTargetID.isXnackSupported() &&

180 FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&

181 FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {

183 "' function does not match module xnack setting");

184 return;

185 }

186

187

188 if (FunctionTargetID.isSramEccSupported() &&

189 FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&

192 "' function does not match module sramecc setting");

193 return;

194 }

195

197 return;

198

199 if (STM.isMesaKernel(F) &&

203 getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);

206 }

207

208 if (STM.isAmdHsaOS())

209 HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);

210}

211

215 return;

216

218 return;

219

221 auto &Context = Streamer.getContext();

224

225 Streamer.pushSection();

226 Streamer.switchSection(&ReadOnlySection);

227

228

229

230 Streamer.emitValueToAlignment(Align(64), 0, 1, 0);

232

234

238 STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),

245 Context),

246 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);

247

248 Streamer.popSection();

249}

250

252 Register RegNo = MI->getOperand(0).getReg();

253

256 OS << "implicit-def: "

258

260 OS << " : SGPR spill to VGPR lane";

261

264}

265

269 return;

270 }

271

279 }

280 if (DumpCodeInstEmitter) {

281

285 }

286

288}

289

292

298 }

300}

301

307 ": unsupported initializer for address space");

308 return;

309 }

310

311

314 return;

315

317

321 "' is already defined");

322

326

330 TS->emitAMDGPULDS(GVSym, Size, Alignment);

331 return;

332 }

333

335}

336

339

341 switch (CodeObjectVersion) {

343 HSAMetadataStream = std::make_uniqueHSAMD::MetadataStreamerMsgPackV4();

344 break;

346 HSAMetadataStream = std::make_uniqueHSAMD::MetadataStreamerMsgPackV5();

347 break;

349 HSAMetadataStream = std::make_uniqueHSAMD::MetadataStreamerMsgPackV6();

350 break;

351 default:

353 }

354 }

355

357}

358

359void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {

361 return;

362

366

367 auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {

368 int64_t Val;

369 if (Value->evaluateAsAbsolute(Val)) {

370 Res = Val;

371 return true;

372 }

373 return false;

374 };

375

376 const uint64_t MaxScratchPerWorkitem =

378 MCSymbol *ScratchSizeSymbol =

381 if (ScratchSizeSymbol->isVariable() &&

382 TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&

383 ScratchSize > MaxScratchPerWorkitem) {

386 F.getContext().diagnose(DiagStackSize);

387 }

388

389

390

398 TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&

399 NumSgpr > MaxAddressableNumSGPRs) {

401 NumSgpr, MaxAddressableNumSGPRs,

403 F.getContext().diagnose(Diag);

404 return;

405 }

406 }

407

412 uint64_t VCCUsed, FlatUsed, NumSgpr;

413

416 TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&

417 TryGetMCExprValue(VCCUsedSymbol->getVariableValue(), VCCUsed) &&

418 TryGetMCExprValue(FlatUsedSymbol->getVariableValue(), FlatUsed)) {

419

420

421

423 &STM, VCCUsed, FlatUsed,

428 if (NumSgpr > MaxAddressableNumSGPRs) {

430 MaxAddressableNumSGPRs, DS_Error,

432 F.getContext().diagnose(Diag);

433 return;

434 }

435 }

436

442

444 getAnalysis().getMMI();

447 TryGetMCExprValue(NumVgprSymbol->getVariableValue(), NumVgpr) &&

448 TryGetMCExprValue(NumAgprSymbol->getVariableValue(), NumAgpr)) {

453 uint64_t NumVGPRsForWavesPerEU = std::max(

455 uint64_t NumSGPRsForWavesPerEU = std::max(

463

465 F, "amdgpu-waves-per-eu", {0, 0}, true);

466

467 if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {

469 F, F.getSubprogram(),

470 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "

471 "'" +

472 F.getName() + "': desired occupancy was " + Twine(MinWEU) +

473 ", final occupancy is " + Twine(Occupancy));

474 F.getContext().diagnose(Diag);

475 return;

476 }

477 }

478 }

479}

480

482

483

484

491 }

492

493

494

496

497

501 OutStreamer->switchSection(MaxGPRSection);

506

507 for (Function &F : M.functions())

508 validateMCResourceInfo(F);

509

511

513}

514

519 auto &Context = Streamer.getContext();

522 return Str;

523}

524

525

526void AMDGPUAsmPrinter::emitCommonFunctionComments(

527 const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR,

530 OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);

531 OutStreamer->emitRawComment(" TotalNumSgprs: " + getMCExprStr(NumSGPR),

532 false);

533 OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false);

534 if (NumAGPR && TotalNumVGPR) {

535 OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(NumAGPR), false);

536 OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),

537 false);

538 }

539 OutStreamer->emitRawComment(" ScratchSize: " + getMCExprStr(ScratchSize),

540 false);

542 false);

543}

544

545const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(

549 uint16_t KernelCodeProperties = 0;

551

553 KernelCodeProperties |=

554 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;

555 }

557 KernelCodeProperties |=

558 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

559 }

561 KernelCodeProperties |=

562 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;

563 }

565 KernelCodeProperties |=

566 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;

567 }

569 KernelCodeProperties |=

570 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;

571 }

573 KernelCodeProperties |=

574 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;

575 }

577 KernelCodeProperties |=

578 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;

579 }

581 KernelCodeProperties |=

582 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;

583 }

584

585

586

587

588

589 const MCExpr *KernelCodePropExpr =

592 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);

594 OrValue, Ctx);

596

597 return KernelCodePropExpr;

598}

599

601AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,

607

609

613

614 Align MaxKernArgAlign;

617

621

622 int64_t PGRM_Rsrc3 = 1;

623 bool EvaluatableRsrc3 =

625 (void)PGRM_Rsrc3;

626 (void)EvaluatableRsrc3;

628 static_cast<uint64_t>(PGRM_Rsrc3) == 0);

630

633 Ctx);

634

635 return KernelDescriptor;

636}

637

639

640

643

644 ResourceUsage = &getAnalysis();

645 CurrentProgramInfo.reset(MF);

646

649

650

651

653

655

658

662 OutStreamer->switchSection(ConfigSection);

663 }

664

668

670 getSIProgramInfo(CurrentProgramInfo, MF);

671 }

672

675 EmitPALMetadata(MF, CurrentProgramInfo);

677 emitPALFunctionMetadata(MF);

679 EmitProgramInfoSI(MF, CurrentProgramInfo);

680 }

681

682 DumpCodeInstEmitter = nullptr;

684

685

687 if (Assembler)

688 DumpCodeInstEmitter = Assembler->getEmitterPtr();

689 }

690

694

696

699

700 {

717 }

718

722 OutStreamer->switchSection(CommentSection);

723

726 OutStreamer->emitRawComment(" Function info:", false);

727

728 emitCommonFunctionComments(

734 : nullptr,

739 Ctx),

743 getFunctionCodeSize(MF), MFI);

744 return false;

745 }

746

747 OutStreamer->emitRawComment(" Kernel info:", false);

748 emitCommonFunctionComments(

751 CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,

752 CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI);

753

755 " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);

757 " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);

759 " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +

760 " bytes/workgroup (compile time only)", false);

761

763 " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks), false);

764

766 " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks), false);

767

769 " NumSGPRsForWavesPerEU: " +

771 false);

773 " NumVGPRsForWavesPerEU: " +

775 false);

776

783 " AccumOffset: " + getMCExprStr(AdjustedAccum), false);

784 }

785

787 " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false);

788

791

793 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +

795 false);

796 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +

798 false);

799 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +

801 false);

802 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +

804 false);

805 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +

807 false);

808 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +

810 false);

811 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +

813 false);

814

815 [[maybe_unused]] int64_t PGMRSrc3;

818 PGMRSrc3) &&

819 static_cast<uint64_t>(PGMRSrc3) == 0));

822 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +

825 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,

826 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),

827 false);

829 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +

832 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,

833 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),

834 false);

835 }

836 }

837

838 if (DumpCodeInstEmitter) {

839

842

843 for (size_t i = 0; i < DisasmLines.size(); ++i) {

844 std::string Comment = "\n";

847 Comment += " ; " + HexLines[i] + "\n";

848 }

849

852 }

853 }

854

855 return false;

856}

857

858

859void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {

860

861

864

865

866 if (M.empty())

867 return;

868

869

870

871 for (auto &F : M) {

873 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&

874 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))

875 break;

876

879 if (TSTargetID->isXnackSupported())

880 if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)

881 TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());

882 if (TSTargetID->isSramEccSupported())

883 if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)

884 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());

885 }

886}

887

891

893

896

897

898

899 if (MI.isDebugInstr())

900 continue;

901

902 CodeSize += TII->getInstSizeInBytes(MI);

903 }

904 }

905

906 return CodeSize;

907}

908

909

910

914

915

916 const MCExpr *MaximumTaken =

918

919

922 Ctx);

923

925}

926

927void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

931

932 auto CreateExpr = [&Ctx](int64_t Value) {

934 };

935

936 auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {

937 int64_t Val;

938 if (Value->evaluateAsAbsolute(Val)) {

939 Res = Val;

940 return true;

941 }

942 return false;

943 };

944

945 auto GetSymRefExpr =

949 };

950

952 ProgInfo.NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);

953 ProgInfo.NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);

956

959 ProgInfo.NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);

960 ProgInfo.ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);

961 ProgInfo.VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);

962 ProgInfo.FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);

965 GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);

966

968

969

970

971

975

976

981 if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&

982 NumSgpr > MaxAddressableNumSGPRs) {

983

986 MF.getFunction(), "addressable scalar registers", NumSgpr,

989 ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);

990 }

991 }

992

993

995

997

998

999

1000 unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;

1001

1002 if (isShader(F.getCallingConv())) {

1003 bool IsPixelShader =

1005

1006

1009 unsigned LastEna = 0;

1010

1011 if (IsPixelShader) {

1012

1013

1014

1015

1016

1017

1020

1021

1022 assert((InputEna || InputAddr) &&

1023 "PSInputAddr and PSInputEnable should "

1024 "never both be 0 for AMDGPU_PS shaders");

1025

1026

1027

1028 LastEna = InputEna ? llvm::Log2_32(InputEna) + 1 : 1;

1029 }

1030

1031

1032

1034 unsigned PSArgCount = 0;

1035 unsigned IntermediateVGPR = 0;

1036 for (auto &Arg : F.args()) {

1037 unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;

1038 if (Arg.hasAttribute(Attribute::InReg)) {

1039 WaveDispatchNumSGPR += NumRegs;

1040 } else {

1041

1042

1043

1044

1045

1046 if (IsPixelShader && PSArgCount < 16) {

1047 if ((1 << PSArgCount) & InputAddr) {

1048 if (PSArgCount < LastEna)

1049 WaveDispatchNumVGPR += NumRegs;

1050 else

1051 IntermediateVGPR += NumRegs;

1052 }

1053 PSArgCount++;

1054 } else {

1055

1056

1057 if (IntermediateVGPR) {

1058 WaveDispatchNumVGPR += IntermediateVGPR;

1059 IntermediateVGPR = 0;

1060 }

1061 WaveDispatchNumVGPR += NumRegs;

1062 }

1063 }

1064 }

1066 {ProgInfo.NumSGPR, CreateExpr(WaveDispatchNumSGPR)}, Ctx);

1067

1069 {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);

1070

1073 } else if (isKernel(F.getCallingConv()) &&

1075

1076

1077

1082 }

1083

1084

1085

1090 Ctx);

1094 Ctx);

1095

1100 if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&

1101 NumSgpr > MaxAddressableNumSGPRs) {

1102

1103

1106 NumSgpr, MaxAddressableNumSGPRs,

1109 ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs);

1111 }

1112 }

1113

1119 }

1120

1127 }

1128

1136 }

1137

1138

1139 auto GetNumGPRBlocks = [&CreateExpr, &Ctx](const MCExpr *NumGPR,

1140 unsigned Granule) {

1141 const MCExpr *OneConst = CreateExpr(1ul);

1142 const MCExpr *GranuleConst = CreateExpr(Granule);

1144 const MCExpr *AlignToGPR =

1146 const MCExpr *DivGPR =

1149 return SubGPR;

1150 };

1151

1156

1158

1159

1160

1162

1164

1165

1167

1168 unsigned LDSAlignShift;

1169 if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) {

1170

1171 LDSAlignShift = 11;

1172 } else if (STM.getFeatureBits().test(

1173 FeatureAddressableLocalMemorySize65536)) {

1174

1175 LDSAlignShift = 9;

1176 } else {

1177

1178 LDSAlignShift = 8;

1179 }

1180

1183

1186 alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;

1187

1188

1189 auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) {

1190 const MCExpr *Ceil =

1193 };

1194

1195

1196 unsigned ScratchAlignShift =

1198

1199

1200

1204 CreateExpr(1ULL << ScratchAlignShift));

1205

1209 }

1210

1211

1212 unsigned TIDIGCompCnt = 0;

1214 TIDIGCompCnt = 2;

1216 TIDIGCompCnt = 1;

1217

1218

1219

1220

1221

1222

1227

1229

1238

1241

1243

1251 return Dst;

1252 };

1253

1256 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,

1257 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);

1260 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,

1261 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);

1262 }

1263

1267

1268 const auto [MinWEU, MaxWEU] =

1271 if (TryGetMCExprValue(ProgInfo.Occupancy, Occupancy) && Occupancy < MinWEU) {

1273 F, F.getSubprogram(),

1274 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "

1275 "'" +

1276 F.getName() + "': desired occupancy was " + Twine(MinWEU) +

1277 ", final occupancy is " + Twine(Occupancy));

1278 F.getContext().diagnose(Diag);

1279 }

1280}

1281

1283 switch (CallConv) {

1284 default: [[fallthrough]];

1292 }

1293}

1294

1295void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,

1301

1302

1307 shft, Ctx);

1308 };

1309

1310 auto EmitResolvedOrExpr = [this](const MCExpr *Value, unsigned Size) {

1311 int64_t Val;

1312 if (Value->evaluateAsAbsolute(Val))

1314 else

1316 };

1317

1320

1322 4);

1323

1325 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(Ctx), 4);

1326

1328

1329

1330

1332 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

1333 0x3FFFF, 12),

1334 4);

1336 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

1337 0x7FFF, 12),

1338 4);

1339 } else {

1340 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

1341 0x1FFF, 12),

1342 4);

1343 }

1344

1345

1346

1347 } else {

1349

1351 SetBits(CurrentProgramInfo.VGPRBlocks, 0x3F, 0),

1352 SetBits(CurrentProgramInfo.SGPRBlocks, 0x0F, 6),

1354 EmitResolvedOrExpr(GPRBlocks, 4);

1356

1357

1358

1360 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

1361 0x3FFFF, 12),

1362 4);

1364 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

1365 0x7FFF, 12),

1366 4);

1367 } else {

1368 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

1369 0x1FFF, 12),

1370 4);

1371 }

1372 }

1373

1378 : CurrentProgramInfo.LDSBlocks;

1384 }

1385

1390}

1391

1392

1396 if (ST.hasIEEEMode())

1398

1401

1406 }

1407

1409 (unsigned)(CurrentProgramInfo.LdsSize *

1411}

1412

1413

1414

1415

1416

1417

1418void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,

1424

1427

1428

1431 MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);

1432 }

1433

1435 if (MD->getPALMajorVersion() < 3) {

1436 MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);

1439 } else {

1440 const MCExpr *HasScratchBlocks =

1444 MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);

1445 }

1446 } else {

1447 MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);

1451 }

1452

1453

1454 MD->setScratchSize(

1458 Ctx);

1459

1463 : CurrentProgramInfo.LDSBlocks;

1464 if (MD->getPALMajorVersion() < 3) {

1465 MD->setRsrc2(

1468 Ctx);

1471 } else {

1472

1473 const unsigned ExtraLdsDwGranularity =

1475 MD->setGraphicsRegisters(

1476 ".ps_extra_lds_size",

1477 (unsigned)(ExtraLDSSize * ExtraLdsDwGranularity * sizeof(uint32_t)));

1478

1479

1481 ".persp_sample_ena", ".persp_center_ena",

1482 ".persp_centroid_ena", ".persp_pull_model_ena",

1483 ".linear_sample_ena", ".linear_center_ena",

1484 ".linear_centroid_ena", ".line_stipple_tex_ena",

1485 ".pos_x_float_ena", ".pos_y_float_ena",

1486 ".pos_z_float_ena", ".pos_w_float_ena",

1487 ".front_face_ena", ".ancillary_ena",

1488 ".sample_coverage_ena", ".pos_fixed_pt_ena"};

1492 MD->setGraphicsRegisters(".spi_ps_input_ena", Field,

1493 (bool)((PSInputEna >> Idx) & 1));

1494 MD->setGraphicsRegisters(".spi_ps_input_addr", Field,

1495 (bool)((PSInputAddr >> Idx) & 1));

1496 }

1497 }

1498 }

1499

1500

1501 if (MD->getPALMajorVersion() < 3 && STM.isWave32())

1503}

1504

1505void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {

1509 MD->setFunctionScratchSize(FnName, MFI.getStackSize());

1512

1513 if (MD->getPALMajorVersion() < 3) {

1514

1515 MD->setRsrc1(

1520 } else {

1522 }

1523

1524

1525 MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);

1528}

1529

1530

1532 switch (Size) {

1533 case 4:

1535 case 8:

1537 case 16:

1539 default:

1541 }

1542}

1543

1550

1554

1555 Out.initDefault(&STM, Ctx, false);

1556

1562

1564

1567

1571 }

1572

1575

1578

1581

1584

1587

1590

1591 if (STM.isXNACKEnabled())

1593

1594 Align MaxKernArgAlign;

1600

1601

1602

1603

1605}

1606

1608 const char *ExtraCode, raw_ostream &O) {

1609

1611 return false;

1612

1613 if (ExtraCode && ExtraCode[0]) {

1614 if (ExtraCode[1] != 0)

1615 return true;

1616

1617 switch (ExtraCode[0]) {

1618 case 'r':

1619 break;

1620 default:

1621 return true;

1622 }

1623 }

1624

1625

1627 if (MO.isReg()) {

1630 return false;

1631 }

1632 if (MO.isImm()) {

1633 int64_t Val = MO.getImm();

1635 O << Val;

1636 } else if (isUInt<16>(Val)) {

1637 O << format("0x%" PRIx16, static_cast<uint16_t>(Val));

1638 } else if (isUInt<32>(Val)) {

1639 O << format("0x%" PRIx32, static_cast<uint32_t>(Val));

1640 } else {

1641 O << format("0x%" PRIx64, static_cast<uint64_t>(Val));

1642 }

1643 return false;

1644 }

1645 return true;

1646}

1647

1654}

1655

1656void AMDGPUAsmPrinter::emitResourceUsageRemarks(

1658 bool isModuleEntryFunction, bool hasMAIInsts) {

1659 if (ORE)

1660 return;

1661

1662 const char *Name = "kernel-resource-usage";

1663 const char *Indent = " ";

1664

1665

1668 return;

1669

1670

1672 return;

1673

1674 auto EmitResourceUsageRemark = [&](StringRef RemarkName,

1676

1677

1678

1679 std::string LabelStr = RemarkLabel.str() + ": ";

1680 if (RemarkName != "FunctionName")

1681 LabelStr = Indent + LabelStr;

1682

1688 });

1689 };

1690

1691

1692

1693

1694

1695

1696 EmitResourceUsageRemark("FunctionName", "Function Name",

1698 EmitResourceUsageRemark("NumSGPR", "TotalSGPRs",

1699 getMCExprStr(CurrentProgramInfo.NumSGPR));

1700 EmitResourceUsageRemark("NumVGPR", "VGPRs",

1701 getMCExprStr(CurrentProgramInfo.NumArchVGPR));

1703 EmitResourceUsageRemark("NumAGPR", "AGPRs",

1704 getMCExprStr(CurrentProgramInfo.NumAccVGPR));

1705 }

1706 EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",

1707 getMCExprStr(CurrentProgramInfo.ScratchSize));

1708 int64_t DynStack;

1709 bool DynStackEvaluatable =

1710 CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);

1712 DynStackEvaluatable && DynStack ? "True" : "False";

1713 EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);

1714 EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",

1715 getMCExprStr(CurrentProgramInfo.Occupancy));

1716 EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",

1718 EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",

1720 if (isModuleEntryFunction)

1721 EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",

1722 CurrentProgramInfo.LDSSize);

1723}

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()

static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST)

static unsigned getRsrcReg(CallingConv::ID CallConv)

static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)

static uint32_t getFPMode(SIModeRegisterDefaults Mode)

static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)

static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)

AMDGPU Assembly printer class.

AMDHSA kernel descriptor MCExpr struct for use in MC layer.

MC infrastructure to propagate the function level resource usage info.

Analyzes how many registers and other resources are used by functions.

AMDHSA kernel descriptor definitions.

MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

amd_element_byte_size_t

The values used to define the number of bytes to use for the swizzle element size.

#define AMD_HSA_BITS_SET(dst, mask, val)

@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID

@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE

@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR

@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR

@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE

@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER

@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR

@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED

@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT

@ AMD_CODE_PROPERTY_IS_PTR64

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Analysis containing CSE Info

#define LLVM_EXTERNAL_VISIBILITY

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

AMD GCN specific subclass of TargetSubtarget.

const HexagonInstrInfo * TII

R600 Assembly printer class.

#define R_00B028_SPI_SHADER_PGM_RSRC1_PS

#define R_0286E8_SPI_TMPRING_SIZE

#define FP_ROUND_MODE_DP(x)

#define C_00B84C_SCRATCH_EN

#define FP_ROUND_ROUND_TO_NEAREST

#define R_0286D0_SPI_PS_INPUT_ADDR

#define R_00B860_COMPUTE_TMPRING_SIZE

#define R_00B428_SPI_SHADER_PGM_RSRC1_HS

#define R_00B328_SPI_SHADER_PGM_RSRC1_ES

#define R_00B528_SPI_SHADER_PGM_RSRC1_LS

#define R_0286CC_SPI_PS_INPUT_ENA

#define R_00B128_SPI_SHADER_PGM_RSRC1_VS

#define FP_DENORM_MODE_DP(x)

#define R_00B848_COMPUTE_PGM_RSRC1

#define FP_ROUND_MODE_SP(x)

#define FP_DENORM_MODE_SP(x)

#define R_00B228_SPI_SHADER_PGM_RSRC1_GS

#define S_00B02C_EXTRA_LDS_SIZE(x)

#define R_00B84C_COMPUTE_PGM_RSRC2

#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

void emitFunctionEntryLabel() override

EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.

const MCSubtargetInfo * getGlobalSTI() const

void emitImplicitDef(const MachineInstr *MI) const override

Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.

std::vector< std::string > DisasmLines

void emitStartOfAsmFile(Module &M) override

This virtual method can be overridden by targets that want to emit something at the start of their fi...

StringRef getPassName() const override

getPassName - Return a nice clean name for a pass.

std::vector< std::string > HexLines

bool IsTargetStreamerInitialized

void emitGlobalVariable(const GlobalVariable *GV) override

Emit the specified global variable to the .s file.

void getAnalysisUsage(AnalysisUsage &AU) const override

Record analysis usage.

bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override

Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.

bool runOnMachineFunction(MachineFunction &MF) override

Emit the specified function out to the OutStreamer.

void emitFunctionBodyEnd() override

Targets can override this to emit stuff after the last basic block in the function.

bool doFinalization(Module &M) override

Shut down the asmprinter.

void emitEndOfAsmFile(Module &M) override

This virtual method can be overridden by targets that want to emit something at the end of their file...

AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)

bool doInitialization(Module &M) override

Set up the AsmPrinter when we are working on a new module.

void emitFunctionBodyStart() override

Targets can override this to emit stuff before the first basic block in the function.

void emitBasicBlockStart(const MachineBasicBlock &MBB) override

Targets can override this to emit stuff at the start of a basic block.

AMDGPUTargetStreamer * getTargetStreamer() const

static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)

static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)

static const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, const GCNSubtarget &STM, MCContext &Ctx)

Mimics GCNSubtarget::computeOccupancy for MCExpr.

static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)

static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)

Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...

static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)

uint32_t getLDSSize() const

bool isMemoryBound() const

bool needsWaveLimiter() const

bool isEntryFunction() const

bool isModuleEntryFunction() const

unsigned getAddressableLocalMemorySize() const

Return the maximum number of bytes of LDS that can be allocated to a single workgroup.

unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const

unsigned getWavefrontSize() const

virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)

AMDGPUPALMetadata * getPALMetadata()

virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)

virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)

virtual bool EmitISAVersion()

void initializeTargetID(const MCSubtargetInfo &STI)

virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)

virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)

virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR)

virtual void EmitDirectiveAMDGCNTarget()

virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)

const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const

void setXnackSetting(TargetIDSetting NewXnackSetting)

Sets xnack setting to NewXnackSetting.

bool isXnackOnOrAny() const

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

This class represents an incoming formal argument to a Function.

This class is intended to be used as a driving class for all asm writers.

const TargetLoweringObjectFile & getObjFileLowering() const

Return information about object file lowering.

MCSymbol * getSymbol(const GlobalValue *GV) const

virtual void emitGlobalVariable(const GlobalVariable *GV)

Emit the specified global variable to the .s file.

TargetMachine & TM

Target machine description.

const MCAsmInfo * MAI

Target Asm Printer information.

MachineFunction * MF

The current machine function.

virtual void SetupMachineFunction(MachineFunction &MF)

This should be called when a new MachineFunction is being processed from runOnMachineFunction.

void emitFunctionBody()

This method emits the body and trailer for a function.

virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const

Return true if the basic block has exactly one predecessor and the control transfer mechanism between...

bool doInitialization(Module &M) override

Set up the AsmPrinter when we are working on a new module.

virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const

This emits linkage information about GVSym based on GV, if this is supported by the target.

void getAnalysisUsage(AnalysisUsage &AU) const override

Record analysis usage.

unsigned getFunctionNumber() const

Return a unique ID for the current function.

MachineOptimizationRemarkEmitter * ORE

Optimization remark emitter.

MCSymbol * CurrentFnSym

The symbol for the current function.

MachineModuleInfo * MMI

This is a pointer to the current MachineModuleInfo.

MCContext & OutContext

This is the context for the output file that we are streaming.

bool doFinalization(Module &M) override

Shut down the asmprinter.

virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)

Targets can override this to emit stuff at the start of a basic block.

void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const

This emits visibility information about symbol, if this is supported by the target.

std::unique_ptr< MCStreamer > OutStreamer

This is the MCStreamer object for the file we are generating.

bool isVerbose() const

Return true if assembly output should contain comments.

void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const

virtual void emitFunctionEntryLabel()

EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.

virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)

Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.

A parsed version of the target data layout string in and methods for querying it.

Diagnostic information for optimization failures.

Diagnostic information for stack size etc.

DISubprogram * getSubprogram() const

Get the attached subprogram.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

unsigned getMinNumSGPRs(unsigned WavesPerEU) const

bool hasGFX90AInsts() const

unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const

Return occupancy for the given function.

const SIInstrInfo * getInstrInfo() const override

bool hasSGPRInitBug() const

bool isTgSplitEnabled() const

unsigned getMinNumVGPRs(unsigned WavesPerEU) const

bool isCuModeEnabled() const

const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const

bool isTrapHandlerEnabled() const

unsigned getMaxNumUserSGPRs() const

Generation getGeneration() const

unsigned getAddressableNumSGPRs() const

unsigned getMaxWaveScratchSize() const

bool hasKernargSegmentPtr() const

bool hasDispatchID() const

bool hasPrivateSegmentBuffer() const

bool hasPrivateSegmentSize() const

bool hasDispatchPtr() const

bool hasFlatScratchInit() const

MaybeAlign getAlign() const

Returns the alignment of the given variable or function.

VisibilityTypes getVisibility() const

bool isDeclaration() const

Return true if the primary definition of this global value is outside of the current translation unit...

unsigned getAddressSpace() const

Module * getParent()

Get the module that this global value is contained inside of...

const DataLayout & getDataLayout() const

Get the data layout of the module this global belongs to.

Type * getValueType() const

const Constant * getInitializer() const

getInitializer - Return the initializer for this global variable.

bool hasInitializer() const

Definitions have initializers, declarations don't.

This is an important class for using LLVM in a threaded context.

void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

const DiagnosticHandler * getDiagHandlerPtr() const

getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.

MCCodeEmitter * getEmitterPtr() const

static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)

Context object for machine code objects.

const MCObjectFileInfo * getObjectFileInfo() const

MCSectionELF * getELFSection(const Twine &Section, unsigned Type, unsigned Flags)

void reportError(SMLoc L, const Twine &Msg)

Base class for the full range of assembler expressions which are needed for parsing.

MCSection * getReadOnlySection() const

MCContext & getContext() const

void gatherResourceInfo(const MachineFunction &MF, const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &FRI, MCContext &OutContext)

AMDGPUResourceUsageAnalysis gathers resource usage on a per-function granularity.

MCSymbol * getMaxSGPRSymbol(MCContext &OutContext)

MCSymbol * getMaxAGPRSymbol(MCContext &OutContext)

const MCExpr * createTotalNumVGPRs(const MachineFunction &MF, MCContext &Ctx)

void finalize(MCContext &OutContext)

MCSymbol * getMaxVGPRSymbol(MCContext &OutContext)

const MCExpr * createTotalNumSGPRs(const MachineFunction &MF, bool hasXnack, MCContext &Ctx)

MCSymbol * getSymbol(StringRef FuncName, ResourceInfoKind RIK, MCContext &OutContext)

This represents a section on linux, lots of unix variants and some bare metal systems.

void ensureMinAlignment(Align MinAlignment)

Makes sure that Alignment is at least MinAlignment.

MCContext & getContext() const

Generic base class for all target subtargets.

const Triple & getTargetTriple() const

static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)

MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...

const MCExpr * getVariableValue(bool SetUsed=true) const

getVariableValue - Get the value for variable symbols.

bool isDefined() const

isDefined - Check if this symbol is defined (i.e., it has an address).

StringRef getName() const

getName - Get the symbol name.

bool isVariable() const

isVariable - Check if this is a variable symbol.

void redefineIfPossible()

Prepare this symbol to be redefined.

MCStreamer & getStreamer()

static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

uint64_t getStackSize() const

Return the number of bytes that must be allocated to hold all of the fixed size frame objects.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

void setAlignment(Align A)

setAlignment - Set the alignment of the function.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MCContext & getContext() const

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineBasicBlock & front() const

Representation of each machine instruction.

This class contains meta information specific to a module.

MachineFunction * getMachineFunction(const Function &F) const

Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.

MachineOperand class - Representation of each machine instruction operand.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

Register getReg() const

getReg - Returns the register number.

A Module instance is used to store all the information related to an LLVM module.

Wrapper class representing virtual and physical registers.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

unsigned getNumSpilledVGPRs() const

unsigned getNumSpilledSGPRs() const

GCNUserSGPRUsageInfo & getUserSGPRInfo()

unsigned getMaxWavesPerEU() const

bool hasWorkGroupIDZ() const

bool hasWorkGroupIDY() const

SIModeRegisterDefaults getMode() const

bool hasWorkGroupInfo() const

bool hasWorkItemIDY() const

bool hasWorkGroupIDX() const

unsigned getNumKernargPreloadedSGPRs() const

unsigned getNumUserSGPRs() const

unsigned getPSInputAddr() const

bool hasWorkItemIDZ() const

unsigned getPSInputEnable() const

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

StringRef - Represent a constant reference to a string, i.e.

std::string str() const

str - Get the contents as an std::string.

Primary interface to the complete machine description for the target machine.

const Triple & getTargetTriple() const

const MCSubtargetInfo * getMCSubtargetInfo() const

const STC & getSubtarget(const Function &F) const

This method returns a pointer to the specified type of TargetSubtargetInfo.

MCSymbol * getSymbol(const GlobalValue *GV) const

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

OSType getOS() const

Get the parsed operating system type of this triple.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

LLVM Value Representation.

StringRef getName() const

Return a constant reference to the value's name.

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an SmallVector or SmallString.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ LOCAL_ADDRESS

Address space for local memory.

unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)

@ FIXED_NUM_SGPRS_FOR_INIT_BUG

unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)

unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)

int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)

void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)

unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)

bool isEntryFunctionCC(CallingConv::ID CC)

LLVM_READNONE bool isKernel(CallingConv::ID CC)

IsaVersion getIsaVersion(StringRef GPU)

bool isCompute(CallingConv::ID cc)

const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)

Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...

unsigned getAMDHSACodeObjectVersion(const Module &M)

bool isGFX90A(const MCSubtargetInfo &STI)

bool hasMAIInsts(const MCSubtargetInfo &STI)

LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)

Is this literal inlinable, and not one of the values intended for floating point values.

bool isShader(CallingConv::ID cc)

const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)

bool isGFX10Plus(const MCSubtargetInfo &STI)

constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)

Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...

unsigned hasKernargPreload(const MCSubtargetInfo &STI)

bool isModuleEntryFunctionCC(CallingConv::ID CC)

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

@ SPIR_KERNEL

Used for SPIR kernel functions.

@ AMDGPU_ES

Used for AMDPAL shader stage before geometry shader if geometry is in use.

@ AMDGPU_LS

Used for AMDPAL vertex shader if tessellation is in use.

DiagnosticInfoOptimizationBase::Argument NV

This is an optimization pass for GlobalISel generic memory operations.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

static StringRef getCPU(StringRef CPU)

Processes a CPU name.

Target & getTheR600Target()

The target for R600 GPUs.

AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

Target & getTheGCNTarget()

The target for GCN GPUs.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

unsigned Log2(Align A)

Returns the log2 of the alignment.

Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

Implement std::hash so that hash_code can be used in STL containers.

const SIFunctionResourceInfo & getResourceInfo() const

uint64_t kernarg_segment_byte_size

const MCExpr * workitem_private_segment_byte_size

const MCExpr * compute_pgm_resource2_registers

uint8_t kernarg_segment_alignment

void validate(const MCSubtargetInfo *STI, MCContext &Ctx)

const MCExpr * wavefront_sgpr_count

void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)

const MCExpr * workitem_vgpr_count

const MCExpr * is_dynamic_callstack

uint32_t workgroup_group_segment_byte_size

const MCExpr * compute_pgm_resource1_registers

const MCExpr * compute_pgm_rsrc2

const MCExpr * kernarg_size

const MCExpr * kernarg_preload

const MCExpr * compute_pgm_rsrc3

const MCExpr * private_segment_fixed_size

static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)

const MCExpr * compute_pgm_rsrc1

const MCExpr * group_segment_fixed_size

const MCExpr * kernel_code_properties

This struct is a compact representation of a valid (non-zero power of two) alignment.

virtual bool isAnalysisRemarkEnabled(StringRef PassName) const

Return true if analysis remarks are enabled, override to provide different implementation.

Track resource usage for kernels / entry functions.

const MCExpr * ComputePGMRSrc3GFX90A

const MCExpr * NumArchVGPR

const MCExpr * getComputePGMRSrc2(MCContext &Ctx) const

Compute the value of the ComputePGMRsrc2 register.

const MCExpr * VGPRBlocks

const MCExpr * ScratchBlocks

const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const

Compute the value of the ComputePGMRsrc1 register.

uint32_t TrapHandlerEnable

const MCExpr * ScratchEnable

const MCExpr * AccumOffset

const MCExpr * NumAccVGPR

const MCExpr * DynamicCallStack

const MCExpr * SGPRBlocks

const MCExpr * NumVGPRsForWavesPerEU

const MCExpr * getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST, MCContext &Ctx) const

const MCExpr * ScratchSize

const MCExpr * NumSGPRsForWavesPerEU

void reset(const MachineFunction &MF)

static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)

RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.