LLVM: lib/Target/AMDGPU/GCNSubtarget.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

29#include

30

31using namespace llvm;

32

33#define DEBUG_TYPE "gcn-subtarget"

34

35#define GET_SUBTARGETINFO_TARGET_DESC

36#define GET_SUBTARGETINFO_CTOR

37#define AMDGPUSubtarget GCNSubtarget

38#include "AMDGPUGenSubtargetInfo.inc"

39#undef AMDGPUSubtarget

40

42 "amdgpu-vgpr-index-mode",

43 cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),

45

47 cl::desc("Enable the use of AA during codegen."),

49

52 cl::desc("Number of addresses from which to enable MIMG NSA."),

54

56

60

61

62

63

64

65

66

67

68

69 SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");

70

71

72

74 FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";

75

76 FullFS += "+enable-prt-strict-null,";

77

78

79 if (FS.contains_insensitive("+wavefrontsize")) {

80 if (!FS.contains_insensitive("wavefrontsize16"))

81 FullFS += "-wavefrontsize16,";

82 if (!FS.contains_insensitive("wavefrontsize32"))

83 FullFS += "-wavefrontsize32,";

84 if (!FS.contains_insensitive("wavefrontsize64"))

85 FullFS += "-wavefrontsize64,";

86 }

87

88 FullFS += FS;

89

91

92

93

94

95

99

102 } else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&

103 !hasFeature(AMDGPU::FeatureWavefrontSize64)) {

104

105

106

107 ToggleFeature(AMDGPU::FeatureWavefrontSize32);

109 }

110

111

113

114

115

116

118

119

120

121

123 ToggleFeature(AMDGPU::FeatureFlatForGlobal);

125 }

126

127

129 ToggleFeature(AMDGPU::FeatureFlatForGlobal);

131 }

132

133

136

139

142

144

147

148 TargetID.setTargetIDFromFeaturesString(FS);

149

151 << TargetID.getXnackSetting() << '\n');

152 LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "

153 << TargetID.getSramEccSetting() << '\n');

154

155 return *this;

156}

157

160 if (hasFeature(AMDGPU::FeatureWavefrontSize32) &&

161 hasFeature(AMDGPU::FeatureWavefrontSize64)) {

163 F, "must specify exactly one of wavefrontsize32 and wavefrontsize64"));

164 }

165}

166

169 :

173 InstrItins(getInstrItineraryForCPU(GPU)),

175 TLInfo(TM, *this),

177

180

181 TSInfo = std::make_unique();

182

183 CallLoweringInfo = std::make_unique(*getTargetLowering());

184 InlineAsmLoweringInfo =

186 Legalizer = std::make_unique(*this, TM);

187 RegBankInfo = std::make_unique(*this);

188 InstSelector =

189 std::make_unique(*this, *RegBankInfo, TM);

190}

191

193 return TSInfo.get();

194}

195

198 return 1;

199

200 switch (Opcode) {

201 case AMDGPU::V_LSHLREV_B64_e64:

202 case AMDGPU::V_LSHLREV_B64_gfx10:

203 case AMDGPU::V_LSHLREV_B64_e64_gfx11:

204 case AMDGPU::V_LSHLREV_B64_e32_gfx12:

205 case AMDGPU::V_LSHLREV_B64_e64_gfx12:

206 case AMDGPU::V_LSHL_B64_e64:

207 case AMDGPU::V_LSHRREV_B64_e64:

208 case AMDGPU::V_LSHRREV_B64_gfx10:

209 case AMDGPU::V_LSHRREV_B64_e64_gfx11:

210 case AMDGPU::V_LSHRREV_B64_e64_gfx12:

211 case AMDGPU::V_LSHR_B64_e64:

212 case AMDGPU::V_ASHRREV_I64_e64:

213 case AMDGPU::V_ASHRREV_I64_gfx10:

214 case AMDGPU::V_ASHRREV_I64_e64_gfx11:

215 case AMDGPU::V_ASHRREV_I64_e64_gfx12:

216 case AMDGPU::V_ASHR_I64_e64:

217 return 1;

218 }

219

220 return 2;

221}

222

223

225 switch (Opcode) {

226 case AMDGPU::V_CVT_F16_F32_e32:

227 case AMDGPU::V_CVT_F16_F32_e64:

228 case AMDGPU::V_CVT_F16_U16_e32:

229 case AMDGPU::V_CVT_F16_U16_e64:

230 case AMDGPU::V_CVT_F16_I16_e32:

231 case AMDGPU::V_CVT_F16_I16_e64:

232 case AMDGPU::V_RCP_F16_e64:

233 case AMDGPU::V_RCP_F16_e32:

234 case AMDGPU::V_RSQ_F16_e64:

235 case AMDGPU::V_RSQ_F16_e32:

236 case AMDGPU::V_SQRT_F16_e64:

237 case AMDGPU::V_SQRT_F16_e32:

238 case AMDGPU::V_LOG_F16_e64:

239 case AMDGPU::V_LOG_F16_e32:

240 case AMDGPU::V_EXP_F16_e64:

241 case AMDGPU::V_EXP_F16_e32:

242 case AMDGPU::V_SIN_F16_e64:

243 case AMDGPU::V_SIN_F16_e32:

244 case AMDGPU::V_COS_F16_e64:

245 case AMDGPU::V_COS_F16_e32:

246 case AMDGPU::V_FLOOR_F16_e64:

247 case AMDGPU::V_FLOOR_F16_e32:

248 case AMDGPU::V_CEIL_F16_e64:

249 case AMDGPU::V_CEIL_F16_e32:

250 case AMDGPU::V_TRUNC_F16_e64:

251 case AMDGPU::V_TRUNC_F16_e32:

252 case AMDGPU::V_RNDNE_F16_e64:

253 case AMDGPU::V_RNDNE_F16_e32:

254 case AMDGPU::V_FRACT_F16_e64:

255 case AMDGPU::V_FRACT_F16_e32:

256 case AMDGPU::V_FREXP_MANT_F16_e64:

257 case AMDGPU::V_FREXP_MANT_F16_e32:

258 case AMDGPU::V_FREXP_EXP_I16_F16_e64:

259 case AMDGPU::V_FREXP_EXP_I16_F16_e32:

260 case AMDGPU::V_LDEXP_F16_e64:

261 case AMDGPU::V_LDEXP_F16_e32:

262 case AMDGPU::V_LSHLREV_B16_e64:

263 case AMDGPU::V_LSHLREV_B16_e32:

264 case AMDGPU::V_LSHRREV_B16_e64:

265 case AMDGPU::V_LSHRREV_B16_e32:

266 case AMDGPU::V_ASHRREV_I16_e64:

267 case AMDGPU::V_ASHRREV_I16_e32:

268 case AMDGPU::V_ADD_U16_e64:

269 case AMDGPU::V_ADD_U16_e32:

270 case AMDGPU::V_SUB_U16_e64:

271 case AMDGPU::V_SUB_U16_e32:

272 case AMDGPU::V_SUBREV_U16_e64:

273 case AMDGPU::V_SUBREV_U16_e32:

274 case AMDGPU::V_MUL_LO_U16_e64:

275 case AMDGPU::V_MUL_LO_U16_e32:

276 case AMDGPU::V_ADD_F16_e64:

277 case AMDGPU::V_ADD_F16_e32:

278 case AMDGPU::V_SUB_F16_e64:

279 case AMDGPU::V_SUB_F16_e32:

280 case AMDGPU::V_SUBREV_F16_e64:

281 case AMDGPU::V_SUBREV_F16_e32:

282 case AMDGPU::V_MUL_F16_e64:

283 case AMDGPU::V_MUL_F16_e32:

284 case AMDGPU::V_MAX_F16_e64:

285 case AMDGPU::V_MAX_F16_e32:

286 case AMDGPU::V_MIN_F16_e64:

287 case AMDGPU::V_MIN_F16_e32:

288 case AMDGPU::V_MAX_U16_e64:

289 case AMDGPU::V_MAX_U16_e32:

290 case AMDGPU::V_MIN_U16_e64:

291 case AMDGPU::V_MIN_U16_e32:

292 case AMDGPU::V_MAX_I16_e64:

293 case AMDGPU::V_MAX_I16_e32:

294 case AMDGPU::V_MIN_I16_e64:

295 case AMDGPU::V_MIN_I16_e32:

296 case AMDGPU::V_MAD_F16_e64:

297 case AMDGPU::V_MAD_U16_e64:

298 case AMDGPU::V_MAD_I16_e64:

299 case AMDGPU::V_FMA_F16_e64:

300 case AMDGPU::V_DIV_FIXUP_F16_e64:

301

303 case AMDGPU::V_MADAK_F16:

304 case AMDGPU::V_MADMK_F16:

305 case AMDGPU::V_MAC_F16_e64:

306 case AMDGPU::V_MAC_F16_e32:

307 case AMDGPU::V_FMAMK_F16:

308 case AMDGPU::V_FMAAK_F16:

309 case AMDGPU::V_FMAC_F16_e64:

310 case AMDGPU::V_FMAC_F16_e32:

311

312

313

315 case AMDGPU::V_MAD_MIXLO_F16:

316 case AMDGPU::V_MAD_MIXHI_F16:

317 default:

318 return false;

319 }

320}

321

324

325

326

328

329

330

333

334

337}

338

341 const Function &F = Region.RegionBegin->getMF()->getFunction();

342 Attribute PostRADirectionAttr = F.getFnAttribute("amdgpu-post-ra-direction");

343 if (!PostRADirectionAttr.isValid())

344 return;

345

347 if (PostRADirectionStr == "topdown") {

350 } else if (PostRADirectionStr == "bottomup") {

353 } else if (PostRADirectionStr == "bidirectional") {

356 } else {

358 F, F.getSubprogram(), "invalid value for postRA direction attribute");

359 F.getContext().diagnose(Diag);

360 }

361

363 const char *DirStr = "default";

365 DirStr = "topdown";

367 DirStr = "bottomup";

369 DirStr = "bidirectional";

370

371 dbgs() << "Post-MI-sched direction (" << F.getName() << "): " << DirStr

372 << '\n';

373 });

374}

375

378

379

380 for (auto &MBB : MF) {

381 for (auto &MI : MBB)

382 InstrInfo.fixImplicitOperands(MI);

383 }

384 }

385}

386

388 return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;

389}

390

394

396

401

402unsigned

404 unsigned DynamicVGPRBlockSize) const {

406 DynamicVGPRBlockSize);

407}

408

409unsigned

412 return 2;

413

416 return 6;

418 return 4;

419 }

420

422 return 4;

423 return 2;

424}

425

430

438

439std::pair<unsigned, unsigned>

441 unsigned NumSGPRs, unsigned NumVGPRs) const {

443

444

447

451

452

453 MaxOcc = std::min(MaxOcc, std::min(SGPROcc, VGPROcc));

454 return {std::min(MinOcc, MaxOcc), MaxOcc};

455}

456

458 const Function &F, std::pair<unsigned, unsigned> WavesPerEU,

459 unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const {

460

461

462 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);

463 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);

464

465

466

467 unsigned Requested =

468 F.getFnAttributeAsParsedInteger("amdgpu-num-sgpr", MaxNumSGPRs);

469

470 if (Requested != MaxNumSGPRs) {

471

472 if (Requested && (Requested <= ReservedNumSGPRs))

473 Requested = 0;

474

475

476

477

478

479

480

481

482 unsigned InputNumSGPRs = PreloadedSGPRs;

483 if (Requested && Requested < InputNumSGPRs)

484 Requested = InputNumSGPRs;

485

486

487

488 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))

489 Requested = 0;

490 if (WavesPerEU.second && Requested &&

492 Requested = 0;

493

494 if (Requested)

495 MaxNumSGPRs = Requested;

496 }

497

500

501 return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);

502}

503

510

513

514 const unsigned MaxUserSGPRs =

515 USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) +

516 USI::getNumUserSGPRForField(USI::DispatchPtrID) +

517 USI::getNumUserSGPRForField(USI::QueuePtrID) +

518 USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) +

519 USI::getNumUserSGPRForField(USI::DispatchIdID) +

520 USI::getNumUserSGPRForField(USI::FlatScratchInitID) +

521 USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID);

522

523

524 const unsigned MaxSystemSGPRs = 1 +

525 1 +

526 1 +

527 1 +

528 1;

529

530

531 const unsigned SyntheticSGPRs = 1;

532

533 return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;

534}

535

540

542 const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const {

543 const auto [Min, Max] = NumVGPRBounds;

544

545

546

547

548 unsigned Requested = F.getFnAttributeAsParsedInteger("amdgpu-num-vgpr", Max);

550 Requested *= 2;

551

552

553 return std::clamp(Requested, Min, Max);

554}

555

557

558

562

563 std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);

567}

568

572

573std::pair<unsigned, unsigned>

576

577 unsigned MaxNumVGPRs = MaxVectorRegs;

578 unsigned MaxNumAGPRs = 0;

580

581

582

583

584

585

586

587

588

590 unsigned MinNumAGPRs = 0;

591 const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();

592

593 const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};

594

595

596

597 std::tie(MinNumAGPRs, MaxNumAGPRs) =

599 true);

600

601 if (MinNumAGPRs == DefaultNumAGPR.first) {

602

603 MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;

604 } else {

605

606 MinNumAGPRs = alignTo(MinNumAGPRs, 4);

607

608 MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);

609 }

610

611

612

613 MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);

614 MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);

615

616 MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, NumArchVGPRs);

617 MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);

618

619 assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&

620 MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= NumArchVGPRs &&

621 "invalid register counts");

623

624 MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;

625 }

626

627 return std::pair(MaxNumVGPRs, MaxNumAGPRs);

628}

629

634 Use->isInstr())

635 return;

636

639

642 auto Reg = Dep.getReg();

645 unsigned Lat = 0;

646 for (++I; I != E && I->isBundledWithPred(); ++I) {

647 if (I->modifiesRegister(Reg, TRI))

649 else if (Lat)

650 --Lat;

651 }

653 } else if (UseI->isBundle()) {

655 auto Reg = Dep.getReg();

659 for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {

660 if (I->readsRegister(Reg, TRI))

661 break;

662 --Lat;

663 }

665 } else if (Dep.getLatency() == 0 && Dep.getReg() == AMDGPU::VCC_LO) {

666

667

668

669

670 Dep.setLatency(InstrInfo.getSchedModel().computeOperandLatency(

671 DefI, DefOpIdx, UseI, UseOpIdx));

672 }

673}

674

677 return 0;

678

680 return std::max(NSAThreshold.getValue(), 2u);

681

683 "amdgpu-nsa-threshold", -1);

685 return std::max(Value, 2);

686

688}

689

692 : ST(ST) {

694 const bool IsKernel =

696

697 if (IsKernel && (F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))

698 KernargSegmentPtr = true;

699

700 bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);

701 if (IsAmdHsaOrMesa && !ST.enableFlatScratch())

702 PrivateSegmentBuffer = true;

703 else if (ST.isMesaGfxShader(F))

704 ImplicitBufferPtr = true;

705

707 if (F.hasFnAttribute("amdgpu-no-dispatch-ptr"))

708 DispatchPtr = true;

709

710

711 if (F.hasFnAttribute("amdgpu-no-queue-ptr"))

712 QueuePtr = true;

713

714 if (F.hasFnAttribute("amdgpu-no-dispatch-id"))

715 DispatchID = true;

716 }

717

719 (IsAmdHsaOrMesa || ST.enableFlatScratch()) &&

720

721

722 (ST.enableFlatScratch() ||

724 F.hasFnAttribute("amdgpu-no-flat-scratch-init"))) &&

725 !ST.flatScratchIsArchitected()) {

726 FlatScratchInit = true;

727 }

728

731

734

737

740

743

746

749

752}

753

756 NumKernargPreloadSGPRs += NumSGPRs;

757 NumUsedUserSGPRs += NumSGPRs;

758}

759

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))

This file describes how to lower LLVM calls to machine code calls.

This file declares the targeting of the InstructionSelector class for AMDGPU.

This file declares the targeting of the Machinelegalizer class for AMDGPU.

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

The AMDGPU TargetMachine interface definition for hw codegen targets.

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static cl::opt< unsigned > NSAThreshold("amdgpu-nsa-threshold", cl::desc("Number of addresses from which to enable MIMG NSA."), cl::init(2), cl::Hidden)

static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))

static cl::opt< bool > UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true))

AMD GCN specific subclass of TargetSubtarget.

This file describes how to lower LLVM inline asm to machine code INLINEASM.

Register const TargetRegisterInfo * TRI

This file defines the SmallString class.

std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const

std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...

unsigned getWavefrontSizeLog2() const

AMDGPUSubtarget(Triple TT)

unsigned AddressableLocalMemorySize

Functions, function parameters, and return types can have attributes to indicate how they should be t...

LLVM_ABI StringRef getValueAsString() const

Return the attribute's value as a string.

bool isValid() const

Return true if the attribute is any kind of attribute.

Diagnostic information for optimization failures.

Diagnostic information for unsupported feature in backend.

uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const

For a string attribute Kind, parse attribute as an integer.

InstrItineraryData InstrItins

bool useVGPRIndexMode() const

Definition GCNSubtarget.cpp:391

void mirFileLoaded(MachineFunction &MF) const override

Definition GCNSubtarget.cpp:376

unsigned MaxPrivateElementSize

unsigned getMinNumSGPRs(unsigned WavesPerEU) const

void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

bool hasGFX90AInsts() const

bool has1024AddressableVGPRs() const

unsigned getConstantBusLimit(unsigned Opcode) const

Definition GCNSubtarget.cpp:196

const InstrItineraryData * getInstrItineraryData() const override

void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override

Definition GCNSubtarget.cpp:630

void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override

Definition GCNSubtarget.cpp:339

Align getStackAlignment() const

bool hasMadF16() const

Definition GCNSubtarget.cpp:387

bool hasSGPRInitBug() const

unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const

bool isDynamicVGPREnabled() const

const SIRegisterInfo * getRegisterInfo() const override

unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const

Definition GCNSubtarget.cpp:541

bool zeroesHigh16BitsOfDest(unsigned Opcode) const

Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...

Definition GCNSubtarget.cpp:224

unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const

Definition GCNSubtarget.cpp:457

unsigned getMaxNumPreloadedSGPRs() const

Definition GCNSubtarget.cpp:511

GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)

Definition GCNSubtarget.cpp:57

void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override

Definition GCNSubtarget.cpp:322

std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...

Definition GCNSubtarget.cpp:440

unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const

const SITargetLowering * getTargetLowering() const override

unsigned getNSAThreshold(const MachineFunction &MF) const

Definition GCNSubtarget.cpp:675

bool hasFlatAddressSpace() const

unsigned getReservedNumSGPRs(const MachineFunction &MF) const

Definition GCNSubtarget.cpp:426

bool useAA() const override

Definition GCNSubtarget.cpp:395

bool hasVGPRIndexMode() const

unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const

Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.

Definition GCNSubtarget.cpp:403

bool HasArchitectedFlatScratch

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const

Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.

Definition GCNSubtarget.cpp:397

unsigned getMaxWavesPerEU() const

Generation getGeneration() const

GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)

Definition GCNSubtarget.cpp:167

unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const

std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const

Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...

Definition GCNSubtarget.cpp:574

bool isXNACKEnabled() const

unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const

Definition GCNSubtarget.cpp:410

bool enableSIScheduler() const

unsigned getDynamicVGPRBlockSize() const

void checkSubtargetFeatures(const Function &F) const

Diagnose inconsistent subtarget features before attempting to codegen function F.

Definition GCNSubtarget.cpp:158

const SelectionDAGTargetInfo * getSelectionDAGInfo() const override

Definition GCNSubtarget.cpp:192

AMDGPU::IsaInfo::AMDGPUTargetID TargetID

static unsigned getNumUserSGPRForField(UserSGPRID ID)

bool hasKernargSegmentPtr() const

void allocKernargPreloadSGPRs(unsigned NumSGPRs)

Definition GCNSubtarget.cpp:754

bool hasDispatchID() const

bool hasPrivateSegmentBuffer() const

unsigned getNumFreeUserSGPRs()

Definition GCNSubtarget.cpp:760

bool hasImplicitBufferPtr() const

bool hasPrivateSegmentSize() const

bool hasDispatchPtr() const

GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)

Definition GCNSubtarget.cpp:690

bool hasFlatScratchInit() const

This is an important class for using LLVM in a threaded context.

instr_iterator instr_end()

Instructions::const_iterator const_instr_iterator

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Representation of each machine instruction.

const MachineBasicBlock * getParent() const

Kind getKind() const

Returns an enum value representing the kind of the dependence.

@ Data

Regular data dependence (aka true-dependence).

void setLatency(unsigned Lat)

Sets the latency for this edge.

unsigned getLatency() const

Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...

Register getReg() const

Returns the register associated with this edge.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

unsigned getNumPreloadedSGPRs() const

std::pair< unsigned, unsigned > getWavesPerEU() const

GCNUserSGPRUsageInfo & getUserSGPRInfo()

Scheduling unit. This is a node in the scheduling DAG.

Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

StringRef - Represent a constant reference to a string, i.e.

Information about stack frame layout on the target.

Provide an instruction scheduling machine model to CodeGen passes.

Triple - Helper class for working with autoconf configuration names.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

self_iterator getIterator()

@ FIXED_NUM_SGPRS_FOR_INIT_BUG

unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)

unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)

unsigned getLocalMemorySize(const MCSubtargetInfo *STI)

unsigned getEUsPerCU(const MCSubtargetInfo *STI)

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)

unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)

LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)

unsigned getDynamicVGPRBlockSize(const Function &F)

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ SPIR_KERNEL

Used for SPIR kernel functions.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.

bool ShouldTrackLaneMasks

Track LaneMasks to allow reordering of independent subregister writes of the same vreg.

A region of an MBB for scheduling.