LLVM: lib/Target/Hexagon/HexagonTargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
26
27using namespace llvm;
28
29#define DEBUG_TYPE "hexagontti"
30
33
36 cl::desc("Allow auto-generation of HVX scatter-gather"));
37
40 cl::desc("Enable auto-vectorization of floatint point types on v68."));
41
44 cl::desc("Control lookup table emission on Hexagon target"));
45
48
49
50
51
53
54bool HexagonTTIImpl::useHVX() const {
56}
57
58bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const {
60 if (!VecTy)
61 return false;
62 if (!ST.isTypeForHVX(VecTy))
63 return false;
64 if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
65 return true;
67}
68
69unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
71 return VTy->getNumElements();
73 "Expecting scalar type");
74 return 1;
75}
76
83
84
90
94
95 if (L && L->isInnermost() && canPeel(L) &&
100 }
101}
102
108
109
110
112 bool Vector = ClassID == 1;
114 return useHVX() ? 32 : 0;
115 return 32;
116}
117
119 return useHVX() ? 2 : 1;
120}
121
124 switch (K) {
131 }
132
134}
135
137 return useHVX() ? ST.getVectorLength()*8 : 32;
138}
139
141 bool IsScalable) const {
142 assert(!IsScalable && "Scalable VFs are not supported for Hexagon");
144}
145
151
155 if (ICA.getID() == Intrinsic::bswap) {
156 std::pair<InstructionCost, MVT> LT =
158 return LT.first + 2;
159 }
161}
162
165 const SCEV *S,
167 return 0;
168}
169
176 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
177
179 return 1;
180
181 if (Opcode == Instruction::Store)
184
185 if (Src->isVectorTy()) {
188 if (isHVXVectorType(VecTy)) {
189 unsigned RegWidth =
192 assert(RegWidth && "Non-zero vector register width expected");
193
194 if (VecWidth % RegWidth == 0)
195 return VecWidth / RegWidth;
196
197 const Align RegAlign(RegWidth / 8);
198 if (Alignment > RegAlign)
199 Alignment = RegAlign;
200 unsigned AlignWidth = 8 * Alignment.value();
201 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
202 return 3 * NumLoads;
203 }
204
205
206
207 unsigned Cost =
209
210
211 const Align BoundAlignment = std::min(Alignment, Align(8));
212 unsigned AlignWidth = 8 * BoundAlignment.value();
213 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
214 if (Alignment == Align(4) || Alignment == Align(8))
215 return Cost * NumLoads;
216
218 unsigned LogA = Log2(BoundAlignment);
219 return (3 - LogA) * Cost * NumLoads;
220 }
221
223 OpInfo, I);
224}
225
234
238 bool UseMaskForCond, bool UseMaskForGaps) const {
239 if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
243 UseMaskForCond, UseMaskForGaps);
245}
246
252 if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy())
255 if (Opcode == Instruction::FCmp)
256 return LT.first + FloatFactor * getTypeNumElements(ValTy);
257 }
259 Op1Info, Op2Info, I);
260}
261
266
269 Op2Info, Args, CxtI);
270
271 if (Ty->isVectorTy()) {
272 if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy())
275 if (LT.second.isFloatingPoint())
276 return LT.first + FloatFactor * getTypeNumElements(Ty);
277 }
279 Args, CxtI);
280}
281
287 auto isNonHVXFP = [this] (Type *Ty) {
288 return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy();
289 };
290 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
292
293 if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
294 unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
295 unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
296
300 std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
301
303 return Cost == 0 ? 0 : 1;
305 }
306 return 1;
307}
308
311 unsigned Index,
312 const Value *Op0,
313 const Value *Op1) const {
315 : Val;
316 if (Opcode == Instruction::InsertElement) {
317
318 unsigned Cost = (Index != 0) ? 2 : 0;
319 if (ElemTy->isIntegerTy(32))
321
323 Index, Op0, Op1);
324 }
325
326 if (Opcode == Instruction::ExtractElement)
327 return 2;
328
329 return 1;
330}
331
333 unsigned ,
335
336
338}
339
341 unsigned ,
343
344
346}
347
349
350 if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
352 return false;
353
354 switch (Ty->getScalarSizeInBits()) {
355 case 8:
356 return (getTypeNumElements(Ty) == 128);
357 case 16:
358 if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32)
359 return (Alignment >= 2);
360 break;
361 case 32:
362 if (getTypeNumElements(Ty) == 32)
363 return (Alignment >= 4);
364 break;
365 default:
366 break;
367 }
368 return false;
369}
370
372 if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
374 return false;
375
376 switch (Ty->getScalarSizeInBits()) {
377 case 8:
378 return (getTypeNumElements(Ty) == 128);
379 case 16:
380 if (getTypeNumElements(Ty) == 64)
381 return (Alignment >= 2);
382 break;
383 case 32:
384 if (getTypeNumElements(Ty) == 32)
385 return (Alignment >= 4);
386 break;
387 default:
388 break;
389 }
390 return false;
391}
392
394 Align Alignment) const {
396}
397
399 Align Alignment) const {
401}
402
403
404
406 return ST.getL1PrefetchDistance();
407}
408
410 return ST.getL1CacheLineSize();
411}
412
417 auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
418 if (!CI->isIntegerCast())
419 return false;
420
421
423 unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
424 unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
425 if (DBW != 32 || SBW >= DBW)
426 return false;
427
429
430
431
433 };
434
436 if (isCastFoldedIntoLoad(CI))
439}
440
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static const unsigned FloatFactor
Definition HexagonTargetTransformInfo.cpp:52
static cl::opt< bool > EnableV68FloatAutoHVX("force-hvx-float", cl::Hidden, cl::desc("Enable auto-vectorization of floatint point types on v68."))
cl::opt< bool > HexagonAllowScatterGatherHVX("hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden, cl::desc("Allow auto-generation of HVX scatter-gather"))
static cl::opt< bool > EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target"))
static cl::opt< bool > HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true), cl::Hidden, cl::desc("Enable masked loads/stores for HVX"))
static cl::opt< bool > HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX"))
This file implements a TargetTransformInfo analysis pass specific to the Hexagon target machine.
This pass exposes codegen information to IR-level passes.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
This is the base class for all instructions that perform data casts.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getFixed(ScalarTy MinVal)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition HexagonTargetTransformInfo.cpp:170
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) const override
Definition HexagonTargetTransformInfo.cpp:398
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const override
Definition HexagonTargetTransformInfo.cpp:332
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const override
Definition HexagonTargetTransformInfo.cpp:140
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition HexagonTargetTransformInfo.cpp:282
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) const override
Definition HexagonTargetTransformInfo.cpp:393
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition HexagonTargetTransformInfo.cpp:235
unsigned getNumberOfRegisters(unsigned ClassID) const override
— Vector TTI begin —
Definition HexagonTargetTransformInfo.cpp:111
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *S, TTI::TargetCostKind CostKind) const override
Definition HexagonTargetTransformInfo.cpp:164
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition HexagonTargetTransformInfo.cpp:262
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override
Definition HexagonTargetTransformInfo.cpp:78
unsigned getMinVectorRegisterBitWidth() const override
Definition HexagonTargetTransformInfo.cpp:136
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const override
Definition HexagonTargetTransformInfo.cpp:340
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
Definition HexagonTargetTransformInfo.cpp:348
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition HexagonTargetTransformInfo.cpp:123
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
Compute a cost of the given call instruction.
Definition HexagonTargetTransformInfo.cpp:147
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition HexagonTargetTransformInfo.cpp:91
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition HexagonTargetTransformInfo.cpp:309
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
Definition HexagonTargetTransformInfo.cpp:153
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition HexagonTargetTransformInfo.cpp:227
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
Bias LSR towards creating post-increment opportunities.
Definition HexagonTargetTransformInfo.cpp:104
bool shouldBuildLookupTables() const override
Definition HexagonTargetTransformInfo.cpp:441
unsigned getMaxInterleaveFactor(ElementCount VF) const override
Definition HexagonTargetTransformInfo.cpp:118
bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override
Definition HexagonTargetTransformInfo.cpp:371
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition HexagonTargetTransformInfo.cpp:247
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
Definition HexagonTargetTransformInfo.cpp:414
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition HexagonTargetTransformInfo.cpp:85
unsigned getCacheLineSize() const override
Definition HexagonTargetTransformInfo.cpp:409
unsigned getPrefetchDistance() const override
— Vector TTI end —
Definition HexagonTargetTransformInfo.cpp:405
static InstructionCost getMax()
Type * getReturnType() const
Intrinsic::ID getID() const
An instruction for reading from memory.
Represents a single loop in the control flow graph.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
virtual const DataLayout & getDataLayout() const
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
MaskKind
Some targets only support masked load/store with a constant mask.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Free
Expected to fold away in lowering.
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
@ AMK_PostIndexed
Prefer post-indexed addressing mode.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
LLVM Value Representation.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
bool canPeel(const Loop *L)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
unsigned Log2(Align A)
Returns the log2 of the alignment.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Parameters that control the generic loop unrolling transformation.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...