LLVM: lib/Target/AArch64/AArch64TargetTransformInfo.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18
27#include
28#include
29
30namespace llvm {
31
41
45
46 friend BaseT;
47
50
52
55
56 enum MemIntrinsicType {
57 VECTOR_LDST_TWO_ELEMENTS,
58 VECTOR_LDST_THREE_ELEMENTS,
59 VECTOR_LDST_FOUR_ELEMENTS
60 };
61
62
63
64
65 Type *isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
67 Type *SrcOverrideTy = nullptr) const;
68
69
70 bool isSingleExtWideningInstruction(unsigned Opcode, Type *DstTy,
72 Type *SrcOverrideTy = nullptr) const;
73
74
75
76
77
78
79
80
84 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;
85
86public:
88 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
89 TLI(ST->getTargetLowering()) {}
90
92 const Function *Callee) const override;
93
96
98 unsigned DefaultCallPenalty) const override;
99
102
104
105
106
107
115 Instruction *Inst = nullptr) const override;
120
121
122
123
124
125
127
129 return ST->hasSVE();
130 }
131
133 bool Vector = (ClassID == 1);
135 if (ST->hasNEON())
136 return 32;
137 return 0;
138 }
139 return 31;
140 }
141
145
146 std::optional<Instruction *>
148
151 APInt &UndefElts2, APInt &UndefElts3,
153 SimplifyAndSetOp) const override;
154
157
159 return ST->getMinVectorRegisterBitWidth();
160 }
161
163 return ST->getVScaleForTuning();
164 }
165
167
170
171
172
173
174
181
183
185
186
187
189 unsigned Opcode2) const;
190
194
197
200
202 Type *Src) const;
203
207 const Instruction *I = nullptr) const override;
208
211 unsigned Index,
213
215 const Instruction *I = nullptr) const override;
216
219 unsigned Index, const Value *Op0,
220 const Value *Op1) const override;
221
222
223
224
225
228 unsigned Index, Value *Scalar,
229 ArrayRef<std::tuple<Value *, User *, int>>
230 ScalarUserAndIdx) const override;
231
234 unsigned Index) const override;
235
239 unsigned Index) const override;
240
244
248
251
257 const Instruction *CxtI = nullptr) const override;
258
262
268 const Instruction *I = nullptr) const override;
269
273
275 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
278 const Instruction *I = nullptr) const override;
279
282
285 OptimizationRemarkEmitter *ORE) const override;
286
289
292 bool CanCreate = true) const override;
293
295 MemIntrinsicInfo &Info) const override;
296
298 if (Ty->isPointerTy())
299 return true;
300
301 if (Ty->isBFloatTy() && ST->hasBF16())
302 return true;
303
304 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
305 return true;
306
307 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
308 Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
309 return true;
310
311 return false;
312 }
313
315 if (!ST->isSVEorStreamingSVEAvailable())
316 return false;
317
318
320 DataType->getPrimitiveSizeInBits() != 128)
321 return false;
322
324 }
325
331
337
339 return Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isIntegerTy(32) ||
340 Ty->isIntegerTy(64);
341 }
342
344 Align Alignment) const override {
345 if (!ST->isSVEAvailable())
346 return false;
347
349 DataType->getPrimitiveSizeInBits() < 128)
350 return false;
351
353 }
354
356 if (!ST->isSVEAvailable())
357 return false;
358
359
361 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
362 DataTypeFVTy->getNumElements() < 2))
363 return false;
364
366 }
367
371
375
378
379 if (!ST->hasNEON() || NumElements.isScalable())
380 return false;
382 case 8:
383 case 16:
384 case 32:
385 case 64: {
386
387 unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
388 return VectorBits >= 64;
389 }
390 }
391 return false;
392 }
393
395
396
397
398
399
400
401
403 unsigned NumElements = DataTypeTy->getNumElements();
404 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
405 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
407 }
409 }
410
414
416
417 if (ST->isLittleEndian())
420 }
421
423 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
427
429
433 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
434
437 bool &AllowPromotionWithoutCommonHeader) const override;
438
442
444
446 return ST->hasSVE() ? 5 : 0;
447 }
448
451 if (ST->hasSVE())
452 return IVUpdateMayOverflow
455
457 }
458
460
462
464
466 return ST->isSVEorStreamingSVEAvailable();
467 }
468
470
473
475
476
477
482
485 std::optional FMF,
487
490 VectorType *ValTy, std::optional FMF,
492
494 bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty,
496
501 const Instruction *CxtI = nullptr) const override;
502
504 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
507
508
509
510
511
512
514 StackOffset BaseOffset, bool HasBaseReg,
515 int64_t Scale,
516 unsigned AddrSpace) const override;
517
519 return ST->enableSelectOptimize();
520 }
521
523
525 Type *ScalarValTy) const override {
526
528 return 4;
529
531 }
532
533 std::optional getMinPageSize() const override { return 4096; }
534
537
540
541};
542
543}
544
545#endif
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
uint64_t IntrinsicInst * II
This pass exposes codegen information to IR-level passes.
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalNTLoad(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:415
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override
Definition AArch64TargetTransformInfo.h:450
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Definition AArch64TargetTransformInfo.h:166
bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const
Definition AArch64TargetTransformInfo.h:394
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
Definition AArch64TargetTransformInfo.h:376
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
unsigned getMinTripCountTailFoldingThreshold() const override
Definition AArch64TargetTransformInfo.h:445
std::optional< unsigned > getVScaleForTuning() const override
Definition AArch64TargetTransformInfo.h:162
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:372
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override
Definition AArch64TargetTransformInfo.h:332
bool supportsScalableVectors() const override
Definition AArch64TargetTransformInfo.h:465
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
bool enableSelectOptimize() const override
Definition AArch64TargetTransformInfo.h:518
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
Definition AArch64TargetTransformInfo.h:297
bool preferPredicatedReductionSelect() const override
Definition AArch64TargetTransformInfo.h:474
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition AArch64TargetTransformInfo.h:132
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override
Definition AArch64TargetTransformInfo.h:326
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool isLegalNTStore(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:411
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
std::optional< unsigned > getMinPageSize() const override
Definition AArch64TargetTransformInfo.h:533
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
Definition AArch64TargetTransformInfo.h:314
unsigned getMinVectorRegisterBitWidth() const override
Definition AArch64TargetTransformInfo.h:158
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
bool isElementTypeLegalForCompressStore(Type *Ty) const
Definition AArch64TargetTransformInfo.h:338
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
Definition AArch64TargetTransformInfo.h:87
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
Definition AArch64TargetTransformInfo.h:175
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool enableOrderedReductions() const override
Definition AArch64TargetTransformInfo.h:428
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
bool enableInterleavedAccessVectorization() const override
Definition AArch64TargetTransformInfo.h:126
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
Definition AArch64TargetTransformInfo.h:355
unsigned getGISelRematGlobalCost() const override
Definition AArch64TargetTransformInfo.h:443
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
Definition AArch64TargetTransformInfo.h:439
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:368
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
bool enableMaskedInterleavedAccessVectorization() const override
Definition AArch64TargetTransformInfo.h:128
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const override
Definition AArch64TargetTransformInfo.h:343
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
Definition AArch64TargetTransformInfo.h:524
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Information for memory intrinsic cost model.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual const DataLayout & getDataLayout() const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual bool isLegalNTStore(Type *DataType, Align Alignment) const
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
MaskKind
Some targets only support masked load/store with a constant mask.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
PartialReductionExtendKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
CastContextHint
Represents a hint about the context in which a cast is used.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
ArrayRef(const T &OneElt) -> ArrayRef< T >
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.