LLVM: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

18#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

19

23#include

24

25namespace llvm {

26

35

39

40 friend BaseT;

41

43

46

49

50public:

52

56

59

61};

62

66

67 friend BaseT;

68

72 bool IsGraphics;

73 bool HasFP32Denormals;

74 bool HasFP64FP16Denormals;

75 static constexpr bool InlinerVectorBonusPercent = 0;

76

77 static const FeatureBitset InlineFeatureIgnoreList;

78

79 const GCNSubtarget *getST() const { return ST; }

81

82 static inline int getFullRateInstrCost() {

84 }

85

89 }

90

91

92

96 }

97

98

99

101

102 std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;

103

104public:

106

108

112

115

120

125 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;

127 unsigned ChainSizeInBytes,

130 unsigned ChainSizeInBytes,

133

135 unsigned AddrSpace) const;

137 unsigned AddrSpace) const override;

139 unsigned AddrSpace) const override;

140

144 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,

145 std::optional<uint32_t> AtomicElementSize) const override;

146

149 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

151 std::optional<uint32_t> AtomicCpySize) const override;

153

156

162 const Instruction *CxtI = nullptr) const override;

163

165 const Instruction *I = nullptr) const override;

166

168 ArrayRef Indices = {}) const;

169

173 unsigned Index, const Value *Op0,

174 const Value *Op1) const override;

175

179

181

182 if (FromAS == ToAS)

183 return false;

184

185

187 }

188

192

194

195

196 if (IsGraphics)

197 return -1;

199 }

200

203

204 bool

209

211 Value *NewV) const override;

212

215

217 unsigned LaneAgIdx) const;

218

219 std::optional<Instruction *>

221

224 const APInt &DemandedElts,

225 APInt &UndefElts) const;

226

229

232 APInt &UndefElts2, APInt &UndefElts3,

234 SimplifyAndSetOp) const override;

235

237

242 const Instruction *CxtI = nullptr) const override;

243

245 SmallVectorImpl<Use *> &Ops) const override;

246

248 const Function *Callee) const override;

249

254 const AllocaInst *AI) const override;

255

257 return InlinerVectorBonusPercent;

258 }

259

262 std::optional FMF,

264

271

272

274

275

276

278

279

283 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;

284

286

287

288

289

291

292

293

294

299 const Instruction *I = nullptr) const override;

300

301

302

303

305};

306

307}

308

309#endif

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU address space definition.

This file provides a helper that implements much of the TTI interface in terms of the target-independ...

Analysis containing CSE Info

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

uint64_t IntrinsicInst * II

uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override

AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

Class for arbitrary precision integers.

an instruction to allocate memory on the stack

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Convenience struct for specifying and reasoning about fast-math flags.

Container class for subtarget features.

bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const

Simplify a lane index operand (e.g.

GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)

unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

Account for loads of i8 vector types to have reduced cost.

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override

bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override

Definition AMDGPUTargetTransformInfo.h:189

Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const

KnownIEEEMode

Definition AMDGPUTargetTransformInfo.h:285

@ Unknown

Definition AMDGPUTargetTransformInfo.h:285

@ Off

Definition AMDGPUTargetTransformInfo.h:285

bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override

int getInlinerVectorBonusPercent() const override

Definition AMDGPUTargetTransformInfo.h:256

bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const

Analyze if the results of inline asm are divergent.

bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const

unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override

unsigned getNumberOfRegisters(unsigned RCID) const override

bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override

Definition AMDGPUTargetTransformInfo.h:205

bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override

unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override

bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

unsigned getCacheLineSize() const override

Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.

Definition AMDGPUTargetTransformInfo.h:273

bool shouldPrefetchAddressSpace(unsigned AS) const override

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

bool hasBranchDivergence(const Function *F=nullptr) const override

Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override

unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override

unsigned getMaxInterleaveFactor(ElementCount VF) const override

void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

Get intrinsic cost based on arguments.

unsigned getInliningThresholdMultiplier() const override

Definition AMDGPUTargetTransformInfo.h:251

unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override

unsigned getPrefetchDistance() const override

How much before a load we should place the prefetch instruction.

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

KnownIEEEMode fpenvIEEEMode(const Instruction &I) const

Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...

unsigned adjustInliningThreshold(const CallBase *CB) const override

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override

Whether it is profitable to sink the operands of an Instruction I to the basic block of I.

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override

bool isAlwaysUniform(const Value *V) const override

bool areInlineCompatible(const Function *Caller, const Function *Callee) const override

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override

Try to calculate op costs for min/max reduction operations.

bool isSourceOfDivergence(const Value *V) const override

int getInliningLastCallToStaticBonus() const override

unsigned getFlatAddressSpace() const override

Definition AMDGPUTargetTransformInfo.h:193

InstructionCost getVectorSplitCost() const

Definition AMDGPUTargetTransformInfo.h:236

InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const

bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override

Definition AMDGPUTargetTransformInfo.h:116

unsigned getNumberOfParts(Type *Tp) const override

When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const

unsigned getMinVectorRegisterBitWidth() const override

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override

uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override

bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override

Definition AMDGPUTargetTransformInfo.h:180

The core instruction combiner logic.

A wrapper class for inspecting calls to intrinsic functions.

This is an important class for using LLVM in a threaded context.

Represents a single loop in the control flow graph.

The main scalar evolution driver.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...

TargetSubtargetInfo - Generic base class for all target subtargets.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

TargetCostKind

The kind of cost model.

@ TCK_CodeSize

Instruction code size.

PopcntSupportKind

Flags indicating the kind of support for population count.

@ TCC_Basic

The cost of a typical 'add' instruction.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

Triple - Helper class for working with autoconf configuration names.

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM Value Representation.

Base class of all SIMD vector types.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ FLAT_ADDRESS

Address space for flat memory.

@ PRIVATE_ADDRESS

Address space for private memory.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

ArrayRef(const T &OneElt) -> ArrayRef< T >

This struct is a compact representation of a valid (non-zero power of two) alignment.

Information about a load/store intrinsic defined by the target.

Parameters that control the generic loop unrolling transformation.