LLVM: lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
16
18using namespace llvm;
19
20#define DEBUG_TYPE "wasmtti"
21
27
30
31
32 bool Vector = (ClassID == 1);
34 Result = std::max(Result, 16u);
35
36 return Result;
37}
38
41 switch (K) {
48 }
49
51}
52
57
60 Opcode, Ty, CostKind, Op1Info, Op2Info);
61
63 switch (Opcode) {
64 case Instruction::LShr:
65 case Instruction::AShr:
66 case Instruction::Shl:
67
68
69
76 break;
77 }
78 }
80}
81
85 int ISD = TLI->InstructionOpcodeToISD(Opcode);
86 auto SrcTy = TLI->getValueType(DL, Src);
87 auto DstTy = TLI->getValueType(DL, Dst);
88
89 if (!SrcTy.isSimple() || !DstTy.isSimple()) {
91 }
92
93 if (!ST->hasSIMD128()) {
95 }
96
97 auto DstVT = DstTy.getSimpleVT();
98 auto SrcVT = SrcTy.getSimpleVT();
99
100 if (I && I->hasOneUser()) {
102 int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode());
103
104
107
108 if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) ||
109 (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) ||
110 (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) {
111 return 0;
112 }
113
114
115 if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) ||
116 (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) {
117 return 1;
118 }
119 }
120 }
121
123
130
135
142
147
152
155
157
160
162
167
172
177
182
187
192
195
198 };
199
200 if (const auto *Entry =
202 return Entry->Cost;
203 }
204
206}
207
211
212 Options.AllowOverlappingLoads = true;
213
214 if (ST->hasSIMD128())
215 Options.LoadSizes.push_back(16);
216
217 Options.LoadSizes.append({8, 4, 2, 1});
218 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
220
222}
223
231 }
232
233 EVT VT = TLI->getValueType(DL, Ty, true);
234
235 if (VT == MVT::Other)
238
240 if (!LT.first.isValid())
242
243 int ISD = TLI->InstructionOpcodeToISD(Opcode);
245 if (ISD == ISD::LOAD) {
246
247
248
249 switch (width) {
250 default:
251 break;
252 case 32:
253 case 64:
254 case 128:
255 return 2;
256 }
257 } else if (ISD == ISD::STORE) {
258
259 switch (width) {
260 default:
261 break;
262 case 8:
263 case 16:
264 case 32:
265 case 64:
266 case 128:
267 return 2;
268 }
269 }
270
272}
273
277 bool UseMaskForCond, bool UseMaskForGaps) const {
278 assert(Factor >= 2 && "Invalid interleave factor");
279
283 }
284
285 if (UseMaskForCond || UseMaskForGaps)
288 UseMaskForCond, UseMaskForGaps);
289
292 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
293
294 if (MinElts < 2 || MinElts % Factor != 0)
296
297 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
298
299 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
301
302 auto *SubVecTy =
304 VecTy->getElementCount().divideCoefficientBy(Factor));
307
308 unsigned VecSize = DL.getTypeSizeInBits(SubVecTy);
310 unsigned NumAccesses =
311 std::max(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize);
312
313
314
315
316
317
318
319 static const CostTblEntry ShuffleCostTbl[] = {
320
321 {2, MVT::v2i8, 1},
322 {2, MVT::v4i8, 1},
323 {2, MVT::v8i8, 1},
324 {2, MVT::v2i16, 1},
325 {2, MVT::v4i16, 1},
326 {2, MVT::v2i32, 1},
327
328
329 {2, MVT::v16i8, 2},
330 {2, MVT::v8i16, 2},
331 {2, MVT::v4i32, 2},
332
333
334 {4, MVT::v2i8, 4},
335 {4, MVT::v4i8, 4},
336 {4, MVT::v2i16, 4},
337
338
339 {4, MVT::v8i8, 16},
340 {4, MVT::v4i16, 8},
341 {4, MVT::v2i32, 4},
342
343
344 {4, MVT::v4i32, 16},
345 };
346
347 EVT ETy = TLI->getValueType(DL, SubVecTy);
348 if (const auto *Entry =
350 return Entry->Cost + (NumAccesses * MemCost);
351 }
352
355 UseMaskForCond, UseMaskForGaps);
356}
357
360 const Value *Op0, const Value *Op1) const {
362 Opcode, Val, CostKind, Index, Op0, Op1);
363
364
365 if (Index == -1u)
367
369}
370
372 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
377 if (!VF.isFixed() || !ST->hasSIMD128())
379
382
383 if (Opcode != Instruction::Add)
385
387
388 if (AccumEVT != MVT::i32)
390
391
392
393
394
395
396
398 if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||
399 (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {
401 }
402
405
407 if (!BinOp)
409
410 if (OpAExtend != OpBExtend)
412
413 if (*BinOp != Instruction::Mul)
415
416 if (InputTypeA != InputTypeB)
418
419
420 if (InputEVT == MVT::i16 && VF.getFixedValue() == 8)
422
423
424 if (InputEVT == MVT::i8 && VF.getFixedValue() == 16)
426
428}
429
432
433 switch (II->getIntrinsicID()) {
434 default:
435 break;
436 case Intrinsic::vector_reduce_fadd:
438 }
440}
441
445
446
452 return;
453
454
455
456
459
460
463
464
465
467}
468
470 return getST()->hasTailCall();
471}
472
476
477 if (->getType()->isVectorTy() ||
->isShift())
478 return false;
479
480 Value *V = I->getOperand(1);
481
483 return false;
484
487
489
490 Ops.push_back(&I->getOperandUse(1));
491 return true;
492 }
493
494 return false;
495}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static const int MaxVecSize
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
This file a TargetTransformInfoImplBase conforming object specific to the WebAssembly target machine.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool isLoweredToCall(const Function *F) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
PartialReductionExtendKind
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const override
Definition WebAssemblyTargetTransformInfo.cpp:274
bool supportsTailCalls() const override
Definition WebAssemblyTargetTransformInfo.cpp:469
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition WebAssemblyTargetTransformInfo.cpp:224
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition WebAssemblyTargetTransformInfo.cpp:23
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition WebAssemblyTargetTransformInfo.cpp:39
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition WebAssemblyTargetTransformInfo.cpp:53
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Definition WebAssemblyTargetTransformInfo.cpp:473
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
Definition WebAssemblyTargetTransformInfo.cpp:371
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition WebAssemblyTargetTransformInfo.cpp:358
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override
Definition WebAssemblyTargetTransformInfo.cpp:430
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition WebAssemblyTargetTransformInfo.cpp:442
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
Definition WebAssemblyTargetTransformInfo.cpp:209
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition WebAssemblyTargetTransformInfo.cpp:82
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition WebAssemblyTargetTransformInfo.cpp:28
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ SIGN_EXTEND
Conversion operators.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).