LLVM: lib/Target/AArch64/AArch64TargetTransformInfo.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18
27#include
28#include
29
30namespace llvm {
31
41
45
46 friend BaseT;
47
50
52
55
56 enum MemIntrinsicType {
57 VECTOR_LDST_TWO_ELEMENTS,
58 VECTOR_LDST_THREE_ELEMENTS,
59 VECTOR_LDST_FOUR_ELEMENTS
60 };
61
62
63
64
65 Type *isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
67 Type *SrcOverrideTy = nullptr) const;
68
69
70 bool isSingleExtWideningInstruction(unsigned Opcode, Type *DstTy,
72 Type *SrcOverrideTy = nullptr) const;
73
74
75
76
77
78
79
80
84 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;
85
86public:
88 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
89 TLI(ST->getTargetLowering()) {}
90
92 const Function *Callee) const override;
93
96
98 unsigned DefaultCallPenalty) const override;
99
101
103
104
105
106
114 Instruction *Inst = nullptr) const override;
119
120
121
122
123
124
126
128 return ST->hasSVE();
129 }
130
132 bool Vector = (ClassID == 1);
134 if (ST->hasNEON())
135 return 32;
136 return 0;
137 }
138 return 31;
139 }
140
144
145 std::optional<Instruction *>
147
150 APInt &UndefElts2, APInt &UndefElts3,
152 SimplifyAndSetOp) const override;
153
156
158 return ST->getMinVectorRegisterBitWidth();
159 }
160
162 return ST->getVScaleForTuning();
163 }
164
166
169
170
171
172
173
180
182
184
185
186
188 unsigned Opcode2) const;
189
193
196
199
201 Type *Src) const;
202
206 const Instruction *I = nullptr) const override;
207
210 unsigned Index,
212
214 const Instruction *I = nullptr) const override;
215
218 unsigned Index, const Value *Op0,
219 const Value *Op1) const override;
220
221
222
223
224
227 unsigned Index, Value *Scalar,
228 ArrayRef<std::tuple<Value *, User *, int>>
229 ScalarUserAndIdx) const override;
230
233 unsigned Index) const override;
234
238 unsigned Index) const override;
239
243
247
250
256 const Instruction *CxtI = nullptr) const override;
257
261
267 const Instruction *I = nullptr) const override;
268
272
274 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
277 const Instruction *I = nullptr) const override;
278
281
284 OptimizationRemarkEmitter *ORE) const override;
285
288
291 bool CanCreate = true) const override;
292
294 MemIntrinsicInfo &Info) const override;
295
297 if (Ty->isPointerTy())
298 return true;
299
300 if (Ty->isBFloatTy() && ST->hasBF16())
301 return true;
302
303 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
304 return true;
305
306 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
307 Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
308 return true;
309
310 return false;
311 }
312
314 if (!ST->isSVEorStreamingSVEAvailable())
315 return false;
316
317
319 DataType->getPrimitiveSizeInBits() != 128)
320 return false;
321
323 }
324
330
336
338 return Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isIntegerTy(32) ||
339 Ty->isIntegerTy(64);
340 }
341
343 Align Alignment) const override {
344 if (!ST->isSVEAvailable())
345 return false;
346
348 DataType->getPrimitiveSizeInBits() < 128)
349 return false;
350
352 }
353
355 if (!ST->isSVEAvailable())
356 return false;
357
358
360 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
361 DataTypeFVTy->getNumElements() < 2))
362 return false;
363
365 }
366
370
374
377
378 if (!ST->hasNEON() || NumElements.isScalable())
379 return false;
381 case 8:
382 case 16:
383 case 32:
384 case 64: {
385
386 unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
387 return VectorBits >= 64;
388 }
389 }
390 return false;
391 }
392
394
395
396
397
398
399
400
402 unsigned NumElements = DataTypeTy->getNumElements();
403 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
404 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
406 }
408 }
409
413
415
416 if (ST->isLittleEndian())
419 }
420
422 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
426
428
432 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
433
436 bool &AllowPromotionWithoutCommonHeader) const override;
437
441
443
445 return ST->hasSVE() ? 5 : 0;
446 }
447
450 if (ST->hasSVE())
451 return IVUpdateMayOverflow
454
456 }
457
459
461
463
465 return ST->isSVEorStreamingSVEAvailable();
466 }
467
469
472
474
475
476
481
484 std::optional FMF,
486
489 VectorType *ValTy, std::optional FMF,
491
493 bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty,
495
500 const Instruction *CxtI = nullptr) const override;
501
503 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
506
507
508
509
510
511
513 StackOffset BaseOffset, bool HasBaseReg,
514 int64_t Scale,
515 unsigned AddrSpace) const override;
516
518 return ST->enableSelectOptimize();
519 }
520
522
524 Type *ScalarValTy) const override {
525
527 return 4;
528
530 }
531
532 std::optional getMinPageSize() const override { return 4096; }
533
536
539
540};
541
542}
543
544#endif
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
uint64_t IntrinsicInst * II
This pass exposes codegen information to IR-level passes.
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalNTLoad(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:414
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override
Definition AArch64TargetTransformInfo.h:449
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Definition AArch64TargetTransformInfo.h:165
bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const
Definition AArch64TargetTransformInfo.h:393
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
Definition AArch64TargetTransformInfo.h:375
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
unsigned getMinTripCountTailFoldingThreshold() const override
Definition AArch64TargetTransformInfo.h:444
std::optional< unsigned > getVScaleForTuning() const override
Definition AArch64TargetTransformInfo.h:161
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:371
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override
Definition AArch64TargetTransformInfo.h:331
bool supportsScalableVectors() const override
Definition AArch64TargetTransformInfo.h:464
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
bool enableSelectOptimize() const override
Definition AArch64TargetTransformInfo.h:517
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
Definition AArch64TargetTransformInfo.h:296
bool preferPredicatedReductionSelect() const override
Definition AArch64TargetTransformInfo.h:473
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition AArch64TargetTransformInfo.h:131
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override
Definition AArch64TargetTransformInfo.h:325
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool isLegalNTStore(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:410
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
std::optional< unsigned > getMinPageSize() const override
Definition AArch64TargetTransformInfo.h:532
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
Definition AArch64TargetTransformInfo.h:313
unsigned getMinVectorRegisterBitWidth() const override
Definition AArch64TargetTransformInfo.h:157
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
bool isElementTypeLegalForCompressStore(Type *Ty) const
Definition AArch64TargetTransformInfo.h:337
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
Definition AArch64TargetTransformInfo.h:87
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
Definition AArch64TargetTransformInfo.h:174
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool enableOrderedReductions() const override
Definition AArch64TargetTransformInfo.h:427
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
bool enableInterleavedAccessVectorization() const override
Definition AArch64TargetTransformInfo.h:125
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
Definition AArch64TargetTransformInfo.h:354
unsigned getGISelRematGlobalCost() const override
Definition AArch64TargetTransformInfo.h:442
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
Definition AArch64TargetTransformInfo.h:438
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:367
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
bool enableMaskedInterleavedAccessVectorization() const override
Definition AArch64TargetTransformInfo.h:127
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:342
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
Definition AArch64TargetTransformInfo.h:523
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Information for memory intrinsic cost model.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual const DataLayout & getDataLayout() const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual bool isLegalNTStore(Type *DataType, Align Alignment) const
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
MaskKind
Some targets only support masked load/store with a constant mask.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
PartialReductionExtendKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
CastContextHint
Represents a hint about the context in which a cast is used.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
ArrayRef(const T &OneElt) -> ArrayRef< T >
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.