AArch64TargetTransformInfo.h Source File (original) (raw)

16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H

17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H

27#include

28#include

30namespace llvm {

46 friend BaseT;

56 enum MemIntrinsicType {

57 VECTOR_LDST_TWO_ELEMENTS,

58 VECTOR_LDST_THREE_ELEMENTS,

59 VECTOR_LDST_FOUR_ELEMENTS

60 };

65 Type *isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,

67 Type *SrcOverrideTy = nullptr) const;

70 bool isSingleExtWideningInstruction(unsigned Opcode, Type *DstTy,

72 Type *SrcOverrideTy = nullptr) const;

84 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;

86public:

88 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),

89 TLI(ST->getTargetLowering()) {}

92 const Function *Callee) const override;

98 unsigned DefaultCallPenalty) const override;

102

104

105

106

107

115 Instruction *Inst = nullptr) const override;

120

121

122

123

124

125

127

129 return ST->hasSVE();

130 }

131

133 bool Vector = (ClassID == 1);

135 if (ST->hasNEON())

136 return 32;

137 return 0;

138 }

139 return 31;

140 }

141

145

146 std::optional<Instruction *>

148

151 APInt &UndefElts2, APInt &UndefElts3,

153 SimplifyAndSetOp) const override;

154

157

159 return ST->getMinVectorRegisterBitWidth();

160 }

161

163 return ST->getVScaleForTuning();

164 }

165

167

170

171

172

173

174

181

183

185

186

187

189 unsigned Opcode2) const;

190

194

197

200

202 Type *Src) const;

203

207 const Instruction *I = nullptr) const override;

208

211 unsigned Index,

213

215 const Instruction *I = nullptr) const override;

216

219 unsigned Index, const Value *Op0,

220 const Value *Op1) const override;

221

222

223

224

225

228 unsigned Index, Value *Scalar,

229 ArrayRef<std::tuple<Value *, User *, int>>

230 ScalarUserAndIdx) const override;

231

234 unsigned Index) const override;

235

239 unsigned Index) const override;

240

244

248

251

257 const Instruction *CxtI = nullptr) const override;

258

262

268 const Instruction *I = nullptr) const override;

269

273

275 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,

278 const Instruction *I = nullptr) const override;

279

282

285 OptimizationRemarkEmitter *ORE) const override;

286

289

292 bool CanCreate = true) const override;

293

295 MemIntrinsicInfo &Info) const override;

296

298 if (Ty->isPointerTy())

299 return true;

300

301 if (Ty->isBFloatTy() && ST->hasBF16())

302 return true;

303

304 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())

305 return true;

306

307 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||

308 Ty->isIntegerTy(32) || Ty->isIntegerTy(64))

309 return true;

310

311 return false;

312 }

313

315 if (!ST->isSVEorStreamingSVEAvailable())

316 return false;

317

318

320 DataType->getPrimitiveSizeInBits() != 128)

321 return false;

322

324 }

325

331

337

339 return Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isIntegerTy(32) ||

340 Ty->isIntegerTy(64);

341 }

342

344 Align Alignment) const override {

345 if (!ST->isSVEAvailable())

346 return false;

347

349 DataType->getPrimitiveSizeInBits() < 128)

350 return false;

351

353 }

354

356 if (!ST->isSVEAvailable())

357 return false;

358

359

361 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||

362 DataTypeFVTy->getNumElements() < 2))

363 return false;

364

366 }

367

371

375

378

379 if (!ST->hasNEON() || NumElements.isScalable())

380 return false;

382 case 8:

383 case 16:

384 case 32:

385 case 64: {

386

387 unsigned VectorBits = NumElements.getFixedValue() * ElementBits;

388 return VectorBits >= 64;

389 }

390 }

391 return false;

392 }

393

395

396

397

398

399

400

401

403 unsigned NumElements = DataTypeTy->getNumElements();

404 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();

405 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&

407 }

409 }

410

414

416

417 if (ST->isLittleEndian())

420 }

421

423 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,

427

429

433 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;

434

437 bool &AllowPromotionWithoutCommonHeader) const override;

438

442

444

446 return ST->hasSVE() ? 5 : 0;

447 }

448

451 if (ST->hasSVE())

452 return IVUpdateMayOverflow

455

457 }

458

460

462

464

466 return ST->isSVEorStreamingSVEAvailable();

467 }

468

470

473

475

476

477

482

485 std::optional FMF,

487

490 VectorType *ValTy, std::optional FMF,

492

494 bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty,

496

501 const Instruction *CxtI = nullptr) const override;

502

504 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,

507

508

509

510

511

512

514 StackOffset BaseOffset, bool HasBaseReg,

515 int64_t Scale,

516 unsigned AddrSpace) const override;

517

519 return ST->enableSelectOptimize();

520 }

521

523

525 Type *ScalarValTy) const override {

526

528 return 4;

529

531 }

532

533 std::optional getMinPageSize() const override { return 4096; }

534

537

540

541};

542

543}

544

545#endif

This file provides a helper that implements much of the TTI interface in terms of the target-independ...

Analysis containing CSE Info

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

uint64_t IntrinsicInst * II

This pass exposes codegen information to IR-level passes.

InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

bool isLegalNTLoad(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:415

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override

unsigned getMaxInterleaveFactor(ElementCount VF) const override

TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override

Definition AArch64TargetTransformInfo.h:450

InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

bool isVScaleKnownToBeAPowerOfTwo() const override

Definition AArch64TargetTransformInfo.h:166

bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const

Definition AArch64TargetTransformInfo.h:394

InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override

Definition AArch64TargetTransformInfo.h:376

InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override

bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const

InstructionCost getIntImmCost(int64_t Val) const

Calculate the cost of materializing a 64-bit value.

std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const

FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...

bool prefersVectorizedAddressing() const override

unsigned getMinTripCountTailFoldingThreshold() const override

Definition AArch64TargetTransformInfo.h:445

std::optional< unsigned > getVScaleForTuning() const override

Definition AArch64TargetTransformInfo.h:162

InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:372

InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override

bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override

Definition AArch64TargetTransformInfo.h:332

bool supportsScalableVectors() const override

Definition AArch64TargetTransformInfo.h:465

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

bool enableSelectOptimize() const override

Definition AArch64TargetTransformInfo.h:518

InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override

bool isElementTypeLegalForScalableVector(Type *Ty) const override

Definition AArch64TargetTransformInfo.h:297

bool preferPredicatedReductionSelect() const override

Definition AArch64TargetTransformInfo.h:474

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

unsigned getNumberOfRegisters(unsigned ClassID) const override

Definition AArch64TargetTransformInfo.h:132

bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override

Definition AArch64TargetTransformInfo.h:326

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

APInt getPriorityMask(const Function &F) const override

bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override

bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override

InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override

Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

bool isLegalNTStore(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:411

bool useNeonVector(const Type *Ty) const

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

std::optional< unsigned > getMinPageSize() const override

Definition AArch64TargetTransformInfo.h:533

bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const

Definition AArch64TargetTransformInfo.h:314

unsigned getMinVectorRegisterBitWidth() const override

Definition AArch64TargetTransformInfo.h:158

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override

bool isElementTypeLegalForCompressStore(Type *Ty) const

Definition AArch64TargetTransformInfo.h:338

InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override

AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)

Definition AArch64TargetTransformInfo.h:87

unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override

bool areInlineCompatible(const Function *Caller, const Function *Callee) const override

unsigned getMaxNumElements(ElementCount VF) const

Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...

Definition AArch64TargetTransformInfo.h:175

bool shouldTreatInstructionLikeSelect(const Instruction *I) const override

bool enableOrderedReductions() const override

Definition AArch64TargetTransformInfo.h:428

bool isMultiversionedFunction(const Function &F) const override

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override

bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override

TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override

bool enableInterleavedAccessVectorization() const override

Definition AArch64TargetTransformInfo.h:126

InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override

bool isLegalMaskedGatherScatter(Type *DataType) const

Definition AArch64TargetTransformInfo.h:355

unsigned getGISelRematGlobalCost() const override

Definition AArch64TargetTransformInfo.h:443

bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override

See if I should be considered for address type promotion.

APInt getFeatureMask(const Function &F) const override

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override

bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override

bool enableScalableVectorization() const override

InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override

bool shouldExpandReduction(const IntrinsicInst *II) const override

Definition AArch64TargetTransformInfo.h:439

Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override

bool isLegalMaskedGather(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:368

bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const

Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.

bool enableMaskedInterleavedAccessVectorization() const override

Definition AArch64TargetTransformInfo.h:128

unsigned getEpilogueVectorizationMinVF() const override

InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const

InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const

InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override

Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...

bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:343

bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override

unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override

Definition AArch64TargetTransformInfo.h:524

Class for arbitrary precision integers.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override

BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Convenience struct for specifying and reasoning about fast-math flags.

Container class for subtarget features.

The core instruction combiner logic.

A wrapper class for inspecting calls to intrinsic functions.

Represents a single loop in the control flow graph.

Information for memory intrinsic cost model.

The RecurrenceDescriptor is used to identify recurrences variables in a loop.

This class represents an analyzed expression in the program.

The main scalar evolution driver.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

virtual const DataLayout & getDataLayout() const

virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const

virtual bool isLegalNTStore(Type *DataType, Align Alignment) const

virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

MaskKind

Some targets only support masked load/store with a constant mask.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

PopcntSupportKind

Flags indicating the kind of support for population count.

PartialReductionExtendKind

ShuffleKind

The various kinds of shuffle patterns for vector queries.

CastContextHint

Represents a hint about the context in which a cast is used.

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

LLVM Value Representation.

Base class of all SIMD vector types.

constexpr ScalarTy getFixedValue() const

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

ArrayRef(const T &OneElt) -> ArrayRef< T >

@ DataAndControlFlowWithoutRuntimeCheck

Use predicate to control both data and control flow, but modify the trip count so that a runtime over...

@ DataAndControlFlow

Use predicate to control both data and control flow.

@ DataWithoutLaneMask

Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...

This struct is a compact representation of a valid (non-zero power of two) alignment.

Returns options for expansion of memcmp. IsZeroCmp is.

Parameters that control the generic loop unrolling transformation.