LLVM: lib/Target/AMDGPU/GCNSubtarget.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

29#include

30

31using namespace llvm;

32

33#define DEBUG_TYPE "gcn-subtarget"

34

35#define GET_SUBTARGETINFO_TARGET_DESC

36#define GET_SUBTARGETINFO_CTOR

37#define AMDGPUSubtarget GCNSubtarget

38#include "AMDGPUGenSubtargetInfo.inc"

39#undef AMDGPUSubtarget

40

43 cl::desc("Enable scheduling to minimize mAI power bursts"),

45

47 "amdgpu-vgpr-index-mode",

48 cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),

50

52 cl::desc("Enable the use of AA during codegen."),

54

57 cl::desc("Number of addresses from which to enable MIMG NSA."),

59

61

65

66

67

68

69

70

71

72

73

74 SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");

75

76

77

79 FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";

80

81 FullFS += "+enable-prt-strict-null,";

82

83

84 if (FS.contains_insensitive("+wavefrontsize")) {

85 if (!FS.contains_insensitive("wavefrontsize16"))

86 FullFS += "-wavefrontsize16,";

87 if (!FS.contains_insensitive("wavefrontsize32"))

88 FullFS += "-wavefrontsize32,";

89 if (!FS.contains_insensitive("wavefrontsize64"))

90 FullFS += "-wavefrontsize64,";

91 }

92

93 FullFS += FS;

94

96

97

98

99

100

104

107 } else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&

108 !hasFeature(AMDGPU::FeatureWavefrontSize64)) {

109

110

111

112 ToggleFeature(AMDGPU::FeatureWavefrontSize32);

114 }

115

116

118

119

120

121

123

124

125

126

128 ToggleFeature(AMDGPU::FeatureFlatForGlobal);

130 }

131

132

134 ToggleFeature(AMDGPU::FeatureFlatForGlobal);

136 }

137

138

141

144

147

150 !getFeatureBits().test(AMDGPU::FeatureCuMode))

152

155

157

160 LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "

162

163 return *this;

164}

165

168 if (hasFeature(AMDGPU::FeatureWavefrontSize32) &&

169 hasFeature(AMDGPU::FeatureWavefrontSize64)) {

171 F, "must specify exactly one of wavefrontsize32 and wavefrontsize64"));

172 }

173}

174

177 :

180 TargetTriple(TT),

181 TargetID(*this),

182 InstrItins(getInstrItineraryForCPU(GPU)),

183 InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),

184 TLInfo(TM, *this),

185 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {

186

189

190 TSInfo = std::make_unique();

191

192 CallLoweringInfo = std::make_unique(*getTargetLowering());

193 InlineAsmLoweringInfo =

195 Legalizer = std::make_unique(*this, TM);

196 RegBankInfo = std::make_unique(*this);

197 InstSelector =

198 std::make_unique(*this, *RegBankInfo, TM);

199}

200

202 return TSInfo.get();

203}

204

207 return 1;

208

209 switch (Opcode) {

210 case AMDGPU::V_LSHLREV_B64_e64:

211 case AMDGPU::V_LSHLREV_B64_gfx10:

212 case AMDGPU::V_LSHLREV_B64_e64_gfx11:

213 case AMDGPU::V_LSHLREV_B64_e32_gfx12:

214 case AMDGPU::V_LSHLREV_B64_e64_gfx12:

215 case AMDGPU::V_LSHL_B64_e64:

216 case AMDGPU::V_LSHRREV_B64_e64:

217 case AMDGPU::V_LSHRREV_B64_gfx10:

218 case AMDGPU::V_LSHRREV_B64_e64_gfx11:

219 case AMDGPU::V_LSHRREV_B64_e64_gfx12:

220 case AMDGPU::V_LSHR_B64_e64:

221 case AMDGPU::V_ASHRREV_I64_e64:

222 case AMDGPU::V_ASHRREV_I64_gfx10:

223 case AMDGPU::V_ASHRREV_I64_e64_gfx11:

224 case AMDGPU::V_ASHRREV_I64_e64_gfx12:

225 case AMDGPU::V_ASHR_I64_e64:

226 return 1;

227 }

228

229 return 2;

230}

231

232

234 switch (Opcode) {

235 case AMDGPU::V_CVT_F16_F32_e32:

236 case AMDGPU::V_CVT_F16_F32_e64:

237 case AMDGPU::V_CVT_F16_U16_e32:

238 case AMDGPU::V_CVT_F16_U16_e64:

239 case AMDGPU::V_CVT_F16_I16_e32:

240 case AMDGPU::V_CVT_F16_I16_e64:

241 case AMDGPU::V_RCP_F16_e64:

242 case AMDGPU::V_RCP_F16_e32:

243 case AMDGPU::V_RSQ_F16_e64:

244 case AMDGPU::V_RSQ_F16_e32:

245 case AMDGPU::V_SQRT_F16_e64:

246 case AMDGPU::V_SQRT_F16_e32:

247 case AMDGPU::V_LOG_F16_e64:

248 case AMDGPU::V_LOG_F16_e32:

249 case AMDGPU::V_EXP_F16_e64:

250 case AMDGPU::V_EXP_F16_e32:

251 case AMDGPU::V_SIN_F16_e64:

252 case AMDGPU::V_SIN_F16_e32:

253 case AMDGPU::V_COS_F16_e64:

254 case AMDGPU::V_COS_F16_e32:

255 case AMDGPU::V_FLOOR_F16_e64:

256 case AMDGPU::V_FLOOR_F16_e32:

257 case AMDGPU::V_CEIL_F16_e64:

258 case AMDGPU::V_CEIL_F16_e32:

259 case AMDGPU::V_TRUNC_F16_e64:

260 case AMDGPU::V_TRUNC_F16_e32:

261 case AMDGPU::V_RNDNE_F16_e64:

262 case AMDGPU::V_RNDNE_F16_e32:

263 case AMDGPU::V_FRACT_F16_e64:

264 case AMDGPU::V_FRACT_F16_e32:

265 case AMDGPU::V_FREXP_MANT_F16_e64:

266 case AMDGPU::V_FREXP_MANT_F16_e32:

267 case AMDGPU::V_FREXP_EXP_I16_F16_e64:

268 case AMDGPU::V_FREXP_EXP_I16_F16_e32:

269 case AMDGPU::V_LDEXP_F16_e64:

270 case AMDGPU::V_LDEXP_F16_e32:

271 case AMDGPU::V_LSHLREV_B16_e64:

272 case AMDGPU::V_LSHLREV_B16_e32:

273 case AMDGPU::V_LSHRREV_B16_e64:

274 case AMDGPU::V_LSHRREV_B16_e32:

275 case AMDGPU::V_ASHRREV_I16_e64:

276 case AMDGPU::V_ASHRREV_I16_e32:

277 case AMDGPU::V_ADD_U16_e64:

278 case AMDGPU::V_ADD_U16_e32:

279 case AMDGPU::V_SUB_U16_e64:

280 case AMDGPU::V_SUB_U16_e32:

281 case AMDGPU::V_SUBREV_U16_e64:

282 case AMDGPU::V_SUBREV_U16_e32:

283 case AMDGPU::V_MUL_LO_U16_e64:

284 case AMDGPU::V_MUL_LO_U16_e32:

285 case AMDGPU::V_ADD_F16_e64:

286 case AMDGPU::V_ADD_F16_e32:

287 case AMDGPU::V_SUB_F16_e64:

288 case AMDGPU::V_SUB_F16_e32:

289 case AMDGPU::V_SUBREV_F16_e64:

290 case AMDGPU::V_SUBREV_F16_e32:

291 case AMDGPU::V_MUL_F16_e64:

292 case AMDGPU::V_MUL_F16_e32:

293 case AMDGPU::V_MAX_F16_e64:

294 case AMDGPU::V_MAX_F16_e32:

295 case AMDGPU::V_MIN_F16_e64:

296 case AMDGPU::V_MIN_F16_e32:

297 case AMDGPU::V_MAX_U16_e64:

298 case AMDGPU::V_MAX_U16_e32:

299 case AMDGPU::V_MIN_U16_e64:

300 case AMDGPU::V_MIN_U16_e32:

301 case AMDGPU::V_MAX_I16_e64:

302 case AMDGPU::V_MAX_I16_e32:

303 case AMDGPU::V_MIN_I16_e64:

304 case AMDGPU::V_MIN_I16_e32:

305 case AMDGPU::V_MAD_F16_e64:

306 case AMDGPU::V_MAD_U16_e64:

307 case AMDGPU::V_MAD_I16_e64:

308 case AMDGPU::V_FMA_F16_e64:

309 case AMDGPU::V_DIV_FIXUP_F16_e64:

310

312 case AMDGPU::V_MADAK_F16:

313 case AMDGPU::V_MADMK_F16:

314 case AMDGPU::V_MAC_F16_e64:

315 case AMDGPU::V_MAC_F16_e32:

316 case AMDGPU::V_FMAMK_F16:

317 case AMDGPU::V_FMAAK_F16:

318 case AMDGPU::V_FMAC_F16_e64:

319 case AMDGPU::V_FMAC_F16_e32:

320

321

322

324 case AMDGPU::V_MAD_MIXLO_F16:

325 case AMDGPU::V_MAD_MIXHI_F16:

326 default:

327 return false;

328 }

329}

330

332 unsigned NumRegionInstrs) const {

333

334

335

337

338

339

342

343

346}

347

350

351

352 for (auto &MBB : MF) {

353 for (auto &MI : MBB)

355 }

356 }

357}

358

360 return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;

361}

362

365}

366

368

372}

373

376}

377

378unsigned

381 return 2;

382

385 return 6;

387 return 4;

388 }

389

391 return 4;

392 return 2;

393}

394

398}

399

401

402

403

406}

407

409 unsigned NumSGPRs,

410 unsigned NumVGPRs) const {

411 unsigned Occupancy =

413 if (NumSGPRs)

415 if (NumVGPRs)

417 return Occupancy;

418}

419

421 const Function &F, std::pair<unsigned, unsigned> WavesPerEU,

422 unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const {

423

424

425 unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);

426 unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);

427

428

429

430 if (F.hasFnAttribute("amdgpu-num-sgpr")) {

431 unsigned Requested =

432 F.getFnAttributeAsParsedInteger("amdgpu-num-sgpr", MaxNumSGPRs);

433

434

435 if (Requested && (Requested <= ReservedNumSGPRs))

436 Requested = 0;

437

438

439

440

441

442

443

444

445 unsigned InputNumSGPRs = PreloadedSGPRs;

446 if (Requested && Requested < InputNumSGPRs)

447 Requested = InputNumSGPRs;

448

449

450

451 if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))

452 Requested = 0;

453 if (WavesPerEU.second && Requested &&

455 Requested = 0;

456

457 if (Requested)

458 MaxNumSGPRs = Requested;

459 }

460

463

464 return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);

465}

466

472}

473

476

477 const unsigned MaxUserSGPRs =

478 USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) +

479 USI::getNumUserSGPRForField(USI::DispatchPtrID) +

480 USI::getNumUserSGPRForField(USI::QueuePtrID) +

481 USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) +

482 USI::getNumUserSGPRForField(USI::DispatchIdID) +

483 USI::getNumUserSGPRForField(USI::FlatScratchInitID) +

484 USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID);

485

486

487 const unsigned MaxSystemSGPRs = 1 +

488 1 +

489 1 +

490 1 +

491 1;

492

493

494 const unsigned SyntheticSGPRs = 1;

495

496 return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;

497}

498

502}

503

505 const Function &F, std::pair<unsigned, unsigned> WavesPerEU) const {

506

507

508 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);

509

510

511

512 if (F.hasFnAttribute("amdgpu-num-vgpr")) {

513 unsigned Requested =

514 F.getFnAttributeAsParsedInteger("amdgpu-num-vgpr", MaxNumVGPRs);

515

517 Requested *= 2;

518

519

520

521 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))

522 Requested = 0;

523 if (WavesPerEU.second && Requested &&

525 Requested = 0;

526

527 if (Requested)

528 MaxNumVGPRs = Requested;

529 }

530

531 return MaxNumVGPRs;

532}

533

536}

537

542}

543

548 Use->isInstr())

549 return;

550

553

556 auto Reg = Dep.getReg();

559 unsigned Lat = 0;

560 for (++I; I != E && I->isBundledWithPred(); ++I) {

561 if (I->modifiesRegister(Reg, TRI))

563 else if (Lat)

564 --Lat;

565 }

567 } else if (UseI->isBundle()) {

569 auto Reg = Dep.getReg();

573 for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {

574 if (I->readsRegister(Reg, TRI))

575 break;

576 --Lat;

577 }

579 } else if (Dep.getLatency() == 0 && Dep.getReg() == AMDGPU::VCC_LO) {

580

581

582

583

585 DefI, DefOpIdx, UseI, UseOpIdx));

586 }

587}

588

589namespace {

592

594

595 FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}

596

597 bool isSALU(const SUnit *SU) const {

599 return MI && TII->isSALU(*MI) && MI->isTerminator();

600 }

601

602 bool isVALU(const SUnit *SU) const {

604 return MI && TII->isVALU(*MI);

605 }

606

607

608

609 unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,

612 unsigned Linked = 0;

613

614 while (!Worklist.empty() && MaxChain-- > 0) {

615 SUnit *SU = Worklist.pop_back_val();

616 if (!Visited.insert(SU).second)

617 continue;

618

621

624 ++Linked;

625

626 for (SDep &SI : From->Succs) {

627 SUnit *SUv = SI.getSUnit();

628 if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&

631 }

632

634 SUnit *Succ = SI.getSUnit();

635 if (Succ != SU && isSALU(Succ))

636 Worklist.push_back(Succ);

637 }

638 }

639

640 return Linked;

641 }

642

645 if (ST.hasMAIInsts())

646 return;

649 if (!TSchedModel || DAG->SUnits.empty())

650 return;

651

652

653

654

655

656 auto LastSALU = DAG->SUnits.begin();

657 auto E = DAG->SUnits.end();

661 if (TII->isMAI(MAI) ||

662 MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||

663 MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)

664 continue;

665

666 unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;

667

669 dbgs() << "Need " << Lat

670 << " instructions to cover latency.\n");

671

672

673

674 for (; Lat && LastSALU != E; ++LastSALU) {

675 if (Visited.count(&*LastSALU))

676 continue;

677

678 if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||

680 continue;

681

682 Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);

683 }

684 }

685 }

686};

687}

688

690 std::vector<std::unique_ptr> &Mutations) const {

691 Mutations.push_back(std::make_unique(&InstrInfo));

692}

693

694std::unique_ptr

696 return EnablePowerSched ? std::make_unique(&InstrInfo)

697 : nullptr;

698}

699

702 return 0;

703

705 return std::max(NSAThreshold.getValue(), 2u);

706

708 "amdgpu-nsa-threshold", -1);

710 return std::max(Value, 2);

711

713}

714

717 : ST(ST) {

719 const bool IsKernel =

721

722

723 const bool HasCalls = F.hasFnAttribute("amdgpu-calls");

724

725

726 const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");

727

728 if (IsKernel && (F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))

729 KernargSegmentPtr = true;

730

731 bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);

732 if (IsAmdHsaOrMesa && !ST.enableFlatScratch())

733 PrivateSegmentBuffer = true;

734 else if (ST.isMesaGfxShader(F))

735 ImplicitBufferPtr = true;

736

738 if (F.hasFnAttribute("amdgpu-no-dispatch-ptr"))

739 DispatchPtr = true;

740

741

742 if (F.hasFnAttribute("amdgpu-no-queue-ptr"))

743 QueuePtr = true;

744

745 if (F.hasFnAttribute("amdgpu-no-dispatch-id"))

746 DispatchID = true;

747 }

748

749

750

751

753 (IsAmdHsaOrMesa || ST.enableFlatScratch()) &&

754 (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&

755 !ST.flatScratchIsArchitected()) {

756 FlatScratchInit = true;

757 }

758

761

764

767

770

773

776

779

782}

783

786 NumKernargPreloadSGPRs += NumSGPRs;

787 NumUsedUserSGPRs += NumSGPRs;

788}

789

792}

static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))

This file describes how to lower LLVM calls to machine code calls.

This file declares the targeting of the InstructionSelector class for AMDGPU.

This file declares the targeting of the Machinelegalizer class for AMDGPU.

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

The AMDGPU TargetMachine interface definition for hw codegen targets.

BlockVerifier::State From

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static cl::opt< unsigned > NSAThreshold("amdgpu-nsa-threshold", cl::desc("Number of addresses from which to enable MIMG NSA."), cl::init(2), cl::Hidden)

static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))

static cl::opt< bool > EnablePowerSched("amdgpu-enable-power-sched", cl::desc("Enable scheduling to minimize mAI power bursts"), cl::init(false))

static unsigned getMaxNumPreloadedSGPRs()

static cl::opt< bool > UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true))

AMD GCN specific subclass of TargetSubtarget.

const HexagonInstrInfo * TII

This file describes how to lower LLVM inline asm to machine code INLINEASM.

unsigned const TargetRegisterInfo * TRI

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines the SmallString class.

unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const

Inverse of getMaxLocalMemWithWaveCount.

std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const

unsigned getWavefrontSizeLog2() const

unsigned AddressableLocalMemorySize

void setTargetIDFromFeaturesString(StringRef FS)

TargetIDSetting getXnackSetting() const

TargetIDSetting getSramEccSetting() const

Diagnostic information for unsupported feature in backend.

uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const

For a string attribute Kind, parse attribute as an integer.

bool useVGPRIndexMode() const

void mirFileLoaded(MachineFunction &MF) const override

unsigned MaxPrivateElementSize

unsigned getMinNumSGPRs(unsigned WavesPerEU) const

void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

bool hasGFX90AInsts() const

unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const

Return occupancy for the given function.

unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const

unsigned getConstantBusLimit(unsigned Opcode) const

const InstrItineraryData * getInstrItineraryData() const override

void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override

bool hasSGPRInitBug() const

const SIRegisterInfo * getRegisterInfo() const override

unsigned getMaxNumVGPRs(unsigned WavesPerEU) const

unsigned getMinNumVGPRs(unsigned WavesPerEU) const

bool zeroesHigh16BitsOfDest(unsigned Opcode) const

Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...

unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const

GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)

const SITargetLowering * getTargetLowering() const override

unsigned getNSAThreshold(const MachineFunction &MF) const

bool hasFlatAddressSpace() const

unsigned getReservedNumSGPRs(const MachineFunction &MF) const

bool useAA() const override

bool hasVGPRIndexMode() const

unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const

Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.

bool HasArchitectedFlatScratch

std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const

Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.

unsigned getMaxWavesPerEU() const

Generation getGeneration() const

GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)

unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const

bool isXNACKEnabled() const

unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const

bool enableSIScheduler() const

void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override

void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override

void checkSubtargetFeatures(const Function &F) const

Diagnose inconsistent subtarget features before attempting to codegen function F.

const SelectionDAGTargetInfo * getSelectionDAGInfo() const override

AMDGPU::IsaInfo::AMDGPUTargetID TargetID

static unsigned getNumUserSGPRForField(UserSGPRID ID)

bool hasKernargSegmentPtr() const

void allocKernargPreloadSGPRs(unsigned NumSGPRs)

bool hasDispatchID() const

bool hasPrivateSegmentBuffer() const

unsigned getNumFreeUserSGPRs()

bool hasImplicitBufferPtr() const

bool hasPrivateSegmentSize() const

bool hasDispatchPtr() const

GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)

bool hasFlatScratchInit() const

This is an important class for using LLVM in a threaded context.

void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

instr_iterator instr_end()

Instructions::const_iterator const_instr_iterator

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

Kind getKind() const

Returns an enum value representing the kind of the dependence.

@ Data

Regular data dependence (aka true-dependence).

void setLatency(unsigned Lat)

Sets the latency for this edge.

@ Artificial

Arbitrary strong DAG edge (no real dependence).

unsigned getLatency() const

Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...

unsigned getReg() const

Returns the register associated with this edge.

const TargetSchedModel & getSchedModel() const

unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override

void fixImplicitOperands(MachineInstr &MI) const

int pseudoToMCOpcode(int Opcode) const

Return a target-specific opcode if Opcode is a pseudo instruction.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

unsigned getNumPreloadedSGPRs() const

std::pair< unsigned, unsigned > getWavesPerEU() const

GCNUserSGPRUsageInfo & getUserSGPRInfo()

Scheduling unit. This is a node in the scheduling DAG.

SmallVector< SDep, 4 > Succs

All sunit successors.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

A ScheduleDAG for scheduling lists of MachineInstr.

const TargetSchedModel * getSchedModel() const

Gets the machine model for instruction scheduling.

bool addEdge(SUnit *SuccSU, const SDep &PredDep)

Add a DAG edge to the given SU with the given predecessor dependence data.

void dumpNode(const SUnit &SU) const override

bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)

True if an edge can be added from PredSU to SuccSU without creating a cycle.

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

Mutate the DAG as a postpass after normal DAG building.

std::vector< SUnit > SUnits

The scheduling units.

MachineFunction & MF

Machine function.

SUnit ExitSU

Special node for the region exit.

Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

Information about stack frame layout on the target.

TargetInstrInfo - Interface to description of machine instruction set.

Provide an instruction scheduling machine model to CodeGen passes.

unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const

Compute operand latency based on the available machine model.

Triple - Helper class for working with autoconf configuration names.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

self_iterator getIterator()

@ FIXED_NUM_SGPRS_FOR_INIT_BUG

unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)

unsigned getEUsPerCU(const MCSubtargetInfo *STI)

unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)

unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)

unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)

bool isEntryFunctionCC(CallingConv::ID CC)

bool isGFX10Plus(const MCSubtargetInfo &STI)

bool isGraphics(CallingConv::ID cc)

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ SPIR_KERNEL

Used for SPIR kernel functions.

void apply(Opt *O, const Mod &M, const Mods &... Ms)

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.

bool ShouldTrackLaneMasks

Track LaneMasks to allow reordering of independent subregister writes of the same vreg.