LLVM: lib/Target/AMDGPU/AMDGPUSubtarget.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16
21
22namespace llvm {
23
29
31public:
46
47private:
49
50protected:
84
85public:
87
91
92
94
95
96
97
98
99
100
101
103
104
105
106
108 unsigned Dim) const;
109
110
111
112
113
114
115
116
117
118
119
120
122 bool REquiresUniformYZ = false) const;
123
124
125
126
127
128
129
130
131
132
134
135
136
137
138 std::pair<unsigned, unsigned>
140 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
141
142
143
144
145
146 std::pair<unsigned, unsigned>
147 getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
148 unsigned LDSBytes, const Function &F) const;
149
150
151
152
153
154 std::pair<unsigned, unsigned>
156 std::pair<unsigned, unsigned> FlatWorkGroupSizes,
157 unsigned LDSBytes) const;
158
159
160
163
164
165
166
167
168
169 std::pair<unsigned, unsigned>
173
174
175
176
179 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
180
181
182
183
184
185
186 std::pair<unsigned, unsigned>
188
192
196
200
202
206
207 bool isGCN() const { return TargetTriple.isAMDGCN(); }
208
212
216
217
219
220
221
222
223
224
225
227
229
231
235
237
241
243
245
247
251
255
257
261
265
269
273
277
281
285
289
293
297
301
305
309
313
317
318
319
320
321
325
326
327
328
329
333
334
335
336
338
342
343
344
346 switch (TargetTriple.getOS()) {
350 return 0;
352 default:
353
354
355 return 36;
356 }
357
359 }
360
361
362
364
365
367
368
370
371
372
373 virtual unsigned
375
376
377
379
380
381
383
384
385
387
388
390
391
393
394
396
397
398
402
403
404
406
408};
409
410}
411
412#endif
This file defines the SmallVector class.
bool hasFP8ConversionScaleInsts() const
Definition AMDGPUSubtarget.h:242
bool isMesa3DOS() const
Definition AMDGPUSubtarget.h:197
bool hasFminFmaxLegacy() const
Definition AMDGPUSubtarget.h:294
bool HasFminFmaxLegacy
Definition AMDGPUSubtarget.h:75
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool isAmdPalOS() const
Definition AMDGPUSubtarget.h:193
bool HasBF16TransInsts
Definition AMDGPUSubtarget.h:63
char WavefrontSizeLog2
Definition AMDGPUSubtarget.h:83
bool EnableRealTrue16Insts
Definition AMDGPUSubtarget.h:61
bool hasBF16PackedInsts() const
Definition AMDGPUSubtarget.h:236
bool hasSDWA() const
Definition AMDGPUSubtarget.h:270
bool hasFP4ConversionScaleInsts() const
Definition AMDGPUSubtarget.h:246
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
Align getAlignmentForImplicitArgPtr() const
Definition AMDGPUSubtarget.h:339
bool hasMadMacF32Insts() const
Definition AMDGPUSubtarget.h:262
bool HasDsSrc2Insts
Definition AMDGPUSubtarget.h:68
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
Definition AMDGPUSubtarget.h:337
bool EnableD16Writes32BitVgpr
Definition AMDGPUSubtarget.h:62
bool hasSMulHi() const
Definition AMDGPUSubtarget.h:286
bool isMesaKernel(const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool HasBF16PackedInsts
Definition AMDGPUSubtarget.h:65
bool HasTrue16BitInsts
Definition AMDGPUSubtarget.h:53
bool hasCvtPkF16F32Inst() const
Definition AMDGPUSubtarget.h:256
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition AMDGPUSubtarget.h:170
bool Has16BitInsts
Definition AMDGPUSubtarget.h:52
virtual unsigned getMinWavesPerEU() const =0
bool hasBF16ConversionInsts() const
Definition AMDGPUSubtarget.h:232
bool HasFP4ConversionScaleInsts
Definition AMDGPUSubtarget.h:56
bool hasFP6BF6ConversionScaleInsts() const
Definition AMDGPUSubtarget.h:248
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool HasFP6BF6ConversionScaleInsts
Definition AMDGPUSubtarget.h:57
Generation
Definition AMDGPUSubtarget.h:32
@ GFX10
Definition AMDGPUSubtarget.h:42
@ GFX9
Definition AMDGPUSubtarget.h:41
@ EVERGREEN
Definition AMDGPUSubtarget.h:36
@ GFX12
Definition AMDGPUSubtarget.h:44
@ INVALID
Definition AMDGPUSubtarget.h:33
@ R700
Definition AMDGPUSubtarget.h:35
@ SEA_ISLANDS
Definition AMDGPUSubtarget.h:39
@ NORTHERN_ISLANDS
Definition AMDGPUSubtarget.h:37
@ SOUTHERN_ISLANDS
Definition AMDGPUSubtarget.h:38
@ R600
Definition AMDGPUSubtarget.h:34
@ VOLCANIC_ISLANDS
Definition AMDGPUSubtarget.h:40
@ GFX11
Definition AMDGPUSubtarget.h:43
unsigned EUsPerCU
Definition AMDGPUSubtarget.h:79
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
bool hasBF8ConversionScaleInsts() const
Definition AMDGPUSubtarget.h:244
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
Definition AMDGPUSubtarget.h:322
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
Definition AMDGPUSubtarget.h:330
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
bool isGCN3Encoding() const
Definition AMDGPUSubtarget.h:209
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
bool hasMadMixInsts() const
Definition AMDGPUSubtarget.h:238
unsigned getWavefrontSizeLog2() const
Definition AMDGPUSubtarget.h:314
bool HasSMulHi
Definition AMDGPUSubtarget.h:73
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
bool HasF16BF16ToFP6BF6ConversionScaleInsts
Definition AMDGPUSubtarget.h:58
bool has16BitInsts() const
Definition AMDGPUSubtarget.h:213
bool HasF32ToF16BF16ConversionSRInsts
Definition AMDGPUSubtarget.h:60
bool GCN3Encoding
Definition AMDGPUSubtarget.h:51
bool FastFMAF32
Definition AMDGPUSubtarget.h:78
virtual ~AMDGPUSubtarget()=default
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
Definition AMDGPUSubtarget.h:218
bool isAmdHsaOrMesa(const Function &F) const
Definition AMDGPUSubtarget.h:203
unsigned LocalMemorySize
Definition AMDGPUSubtarget.h:81
unsigned MaxWavesPerEU
Definition AMDGPUSubtarget.h:80
bool HasMulU24
Definition AMDGPUSubtarget.h:72
bool hasFastFMAF32() const
Definition AMDGPUSubtarget.h:302
bool HasMulI24
Definition AMDGPUSubtarget.h:71
bool HasTrigReducedRange
Definition AMDGPUSubtarget.h:77
bool isPromoteAllocaEnabled() const
Definition AMDGPUSubtarget.h:306
bool hasTrigReducedRange() const
Definition AMDGPUSubtarget.h:298
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(Triple TT)
bool hasDsSrc2Insts() const
Definition AMDGPUSubtarget.h:266
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition AMDGPUSubtarget.h:345
bool hasF16BF16ToFP6BF6ConversionScaleInsts() const
Definition AMDGPUSubtarget.h:252
unsigned getMaxWavesPerEU() const
Definition AMDGPUSubtarget.h:382
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
bool hasMulU24() const
Definition AMDGPUSubtarget.h:282
bool HasInv2PiInlineImm
Definition AMDGPUSubtarget.h:74
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
bool EnablePromoteAlloca
Definition AMDGPUSubtarget.h:76
bool HasFP8ConversionScaleInsts
Definition AMDGPUSubtarget.h:54
unsigned AddressableLocalMemorySize
Definition AMDGPUSubtarget.h:82
bool isAmdHsaOS() const
Definition AMDGPUSubtarget.h:189
bool HasCvtPkF16F32Inst
Definition AMDGPUSubtarget.h:59
bool HasVOP3PInsts
Definition AMDGPUSubtarget.h:70
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
bool isGCN() const
Definition AMDGPUSubtarget.h:207
static const AMDGPUSubtarget & get(const MachineFunction &MF)
bool HasBF8ConversionScaleInsts
Definition AMDGPUSubtarget.h:55
bool HasSDWA
Definition AMDGPUSubtarget.h:69
bool HasMadMacF32Insts
Definition AMDGPUSubtarget.h:67
bool hasBF16TransInsts() const
Definition AMDGPUSubtarget.h:230
unsigned getWavefrontSize() const
Definition AMDGPUSubtarget.h:310
virtual unsigned getMinFlatWorkGroupSize() const =0
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
bool HasBF16ConversionInsts
Definition AMDGPUSubtarget.h:64
bool hasInv2PiInlineImm() const
Definition AMDGPUSubtarget.h:290
bool hasF32ToF16BF16ConversionSRInsts() const
Definition AMDGPUSubtarget.h:258
bool hasD16Writes32BitVgpr() const
bool HasMadMixInsts
Definition AMDGPUSubtarget.h:66
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
Overload which uses the specified values for the flat work group sizes, rather than querying the func...
bool hasVOP3PInsts() const
Definition AMDGPUSubtarget.h:274
bool hasMulI24() const
Definition AMDGPUSubtarget.h:278
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
This struct is a compact representation of a valid (non-zero power of two) alignment.