AMDGPUSubtarget.h Source File (original) (raw)

14#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H

15#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H

22namespace llvm {

31public:

47private:

50protected:

85public:

100

101

103

104

105

106

108 unsigned Dim) const;

109

110

111

112

113

114

115

116

117

118

119

120

122 bool REquiresUniformYZ = false) const;

123

124

125

126

127

128

129

130

131

132

134

135

136

137

138 std::pair<unsigned, unsigned>

140 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;

141

142

143

144

145

146 std::pair<unsigned, unsigned>

147 getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,

148 unsigned LDSBytes, const Function &F) const;

149

150

151

152

153

154 std::pair<unsigned, unsigned>

156 std::pair<unsigned, unsigned> FlatWorkGroupSizes,

157 unsigned LDSBytes) const;

158

159

160

163

164

165

166

167

168

169 std::pair<unsigned, unsigned>

173

174

175

176

179 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;

180

181

182

183

184

185

186 std::pair<unsigned, unsigned>

188

192

196

200

202

206

207 bool isGCN() const { return TargetTriple.isAMDGCN(); }

208

212

216

217

219

220

221

222

223

224

225

227

229

231

235

237

241

243

245

247

251

255

257

261

265

269

273

277

281

285

289

293

297

301

305

309

313

317

318

319

320

321

325

326

327

328

329

333

334

335

336

338

342

343

344

346 switch (TargetTriple.getOS()) {

350 return 0;

352 default:

353

354

355 return 36;

356 }

357

359 }

360

361

362

364

365

367

368

370

371

372

373 virtual unsigned

375

376

377

379

380

381

383

384

385

387

388

390

391

393

394

396

397

398

402

403

404

406

408};

409

410}

411

412#endif

This file defines the SmallVector class.

bool hasFP8ConversionScaleInsts() const

Definition AMDGPUSubtarget.h:242

bool isMesa3DOS() const

Definition AMDGPUSubtarget.h:197

bool hasFminFmaxLegacy() const

Definition AMDGPUSubtarget.h:294

bool HasFminFmaxLegacy

Definition AMDGPUSubtarget.h:75

std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const

bool isAmdPalOS() const

Definition AMDGPUSubtarget.h:193

bool HasBF16TransInsts

Definition AMDGPUSubtarget.h:63

char WavefrontSizeLog2

Definition AMDGPUSubtarget.h:83

bool EnableRealTrue16Insts

Definition AMDGPUSubtarget.h:61

bool hasBF16PackedInsts() const

Definition AMDGPUSubtarget.h:236

bool hasSDWA() const

Definition AMDGPUSubtarget.h:270

bool hasFP4ConversionScaleInsts() const

Definition AMDGPUSubtarget.h:246

std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const

Align getAlignmentForImplicitArgPtr() const

Definition AMDGPUSubtarget.h:339

bool hasMadMacF32Insts() const

Definition AMDGPUSubtarget.h:262

bool HasDsSrc2Insts

Definition AMDGPUSubtarget.h:68

unsigned getEUsPerCU() const

Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...

Definition AMDGPUSubtarget.h:337

bool EnableD16Writes32BitVgpr

Definition AMDGPUSubtarget.h:62

bool hasSMulHi() const

Definition AMDGPUSubtarget.h:286

bool isMesaKernel(const Function &F) const

std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const

bool HasBF16PackedInsts

Definition AMDGPUSubtarget.h:65

bool HasTrue16BitInsts

Definition AMDGPUSubtarget.h:53

bool hasCvtPkF16F32Inst() const

Definition AMDGPUSubtarget.h:256

bool useRealTrue16Insts() const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...

std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const

Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...

Definition AMDGPUSubtarget.h:170

bool Has16BitInsts

Definition AMDGPUSubtarget.h:52

virtual unsigned getMinWavesPerEU() const =0

bool hasBF16ConversionInsts() const

Definition AMDGPUSubtarget.h:232

bool HasFP4ConversionScaleInsts

Definition AMDGPUSubtarget.h:56

bool hasFP6BF6ConversionScaleInsts() const

Definition AMDGPUSubtarget.h:248

std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const

bool HasFP6BF6ConversionScaleInsts

Definition AMDGPUSubtarget.h:57

Generation

Definition AMDGPUSubtarget.h:32

@ GFX10

Definition AMDGPUSubtarget.h:42

@ GFX9

Definition AMDGPUSubtarget.h:41

@ EVERGREEN

Definition AMDGPUSubtarget.h:36

@ GFX12

Definition AMDGPUSubtarget.h:44

@ INVALID

Definition AMDGPUSubtarget.h:33

@ R700

Definition AMDGPUSubtarget.h:35

@ SEA_ISLANDS

Definition AMDGPUSubtarget.h:39

@ NORTHERN_ISLANDS

Definition AMDGPUSubtarget.h:37

@ SOUTHERN_ISLANDS

Definition AMDGPUSubtarget.h:38

@ R600

Definition AMDGPUSubtarget.h:34

@ VOLCANIC_ISLANDS

Definition AMDGPUSubtarget.h:40

@ GFX11

Definition AMDGPUSubtarget.h:43

unsigned EUsPerCU

Definition AMDGPUSubtarget.h:79

bool makeLIDRangeMetadata(Instruction *I) const

Creates value range metadata on an workitemid.* intrinsic call or load.

bool hasBF8ConversionScaleInsts() const

Definition AMDGPUSubtarget.h:244

unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const

Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.

unsigned getImplicitArgNumBytes(const Function &F) const

unsigned getLocalMemorySize() const

Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.

Definition AMDGPUSubtarget.h:322

unsigned getAddressableLocalMemorySize() const

Return the maximum number of bytes of LDS that can be allocated to a single workgroup.

Definition AMDGPUSubtarget.h:330

SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const

Return the number of work groups for the function.

bool isGCN3Encoding() const

Definition AMDGPUSubtarget.h:209

virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0

virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0

bool hasMadMixInsts() const

Definition AMDGPUSubtarget.h:238

unsigned getWavefrontSizeLog2() const

Definition AMDGPUSubtarget.h:314

bool HasSMulHi

Definition AMDGPUSubtarget.h:73

unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const

bool HasF16BF16ToFP6BF6ConversionScaleInsts

Definition AMDGPUSubtarget.h:58

bool has16BitInsts() const

Definition AMDGPUSubtarget.h:213

bool HasF32ToF16BF16ConversionSRInsts

Definition AMDGPUSubtarget.h:60

bool GCN3Encoding

Definition AMDGPUSubtarget.h:51

bool FastFMAF32

Definition AMDGPUSubtarget.h:78

virtual ~AMDGPUSubtarget()=default

bool hasTrue16BitInsts() const

Return true if the subtarget supports True16 instructions.

Definition AMDGPUSubtarget.h:218

bool isAmdHsaOrMesa(const Function &F) const

Definition AMDGPUSubtarget.h:203

unsigned LocalMemorySize

Definition AMDGPUSubtarget.h:81

unsigned MaxWavesPerEU

Definition AMDGPUSubtarget.h:80

bool HasMulU24

Definition AMDGPUSubtarget.h:72

bool hasFastFMAF32() const

Definition AMDGPUSubtarget.h:302

bool HasMulI24

Definition AMDGPUSubtarget.h:71

bool HasTrigReducedRange

Definition AMDGPUSubtarget.h:77

bool isPromoteAllocaEnabled() const

Definition AMDGPUSubtarget.h:306

bool hasTrigReducedRange() const

Definition AMDGPUSubtarget.h:298

AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const

unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

virtual unsigned getMaxFlatWorkGroupSize() const =0

AMDGPUSubtarget(Triple TT)

bool hasDsSrc2Insts() const

Definition AMDGPUSubtarget.h:266

unsigned getExplicitKernelArgOffset() const

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

Definition AMDGPUSubtarget.h:345

bool hasF16BF16ToFP6BF6ConversionScaleInsts() const

Definition AMDGPUSubtarget.h:252

unsigned getMaxWavesPerEU() const

Definition AMDGPUSubtarget.h:382

bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const

bool hasMulU24() const

Definition AMDGPUSubtarget.h:282

bool HasInv2PiInlineImm

Definition AMDGPUSubtarget.h:74

uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const

bool EnablePromoteAlloca

Definition AMDGPUSubtarget.h:76

bool HasFP8ConversionScaleInsts

Definition AMDGPUSubtarget.h:54

unsigned AddressableLocalMemorySize

Definition AMDGPUSubtarget.h:82

bool isAmdHsaOS() const

Definition AMDGPUSubtarget.h:189

bool HasCvtPkF16F32Inst

Definition AMDGPUSubtarget.h:59

bool HasVOP3PInsts

Definition AMDGPUSubtarget.h:70

bool isSingleLaneExecution(const Function &Kernel) const

Return true if only a single workitem can be active in a wave.

bool isGCN() const

Definition AMDGPUSubtarget.h:207

static const AMDGPUSubtarget & get(const MachineFunction &MF)

bool HasBF8ConversionScaleInsts

Definition AMDGPUSubtarget.h:55

bool HasSDWA

Definition AMDGPUSubtarget.h:69

bool HasMadMacF32Insts

Definition AMDGPUSubtarget.h:67

bool hasBF16TransInsts() const

Definition AMDGPUSubtarget.h:230

unsigned getWavefrontSize() const

Definition AMDGPUSubtarget.h:310

virtual unsigned getMinFlatWorkGroupSize() const =0

std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const

Returns the target minimum/maximum number of waves per EU.

bool HasBF16ConversionInsts

Definition AMDGPUSubtarget.h:64

bool hasInv2PiInlineImm() const

Definition AMDGPUSubtarget.h:290

bool hasF32ToF16BF16ConversionSRInsts() const

Definition AMDGPUSubtarget.h:258

bool hasD16Writes32BitVgpr() const

bool HasMadMixInsts

Definition AMDGPUSubtarget.h:66

std::pair< unsigned, unsigned > getWavesPerEU(const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const

Overload which uses the specified values for the flat work group sizes, rather than querying the func...

bool hasVOP3PInsts() const

Definition AMDGPUSubtarget.h:274

bool hasMulI24() const

Definition AMDGPUSubtarget.h:278

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Primary interface to the complete machine description for the target machine.

Triple - Helper class for working with autoconf configuration names.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

This struct is a compact representation of a valid (non-zero power of two) alignment.