AArch64TargetTransformInfo.h Source File (original) (raw)

16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H

17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H

27#include

28#include

30namespace llvm {

46 friend BaseT;

56 enum MemIntrinsicType {

57 VECTOR_LDST_TWO_ELEMENTS,

58 VECTOR_LDST_THREE_ELEMENTS,

59 VECTOR_LDST_FOUR_ELEMENTS

60 };

65 Type *isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,

67 Type *SrcOverrideTy = nullptr) const;

70 bool isSingleExtWideningInstruction(unsigned Opcode, Type *DstTy,

72 Type *SrcOverrideTy = nullptr) const;

84 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;

86public:

88 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),

89 TLI(ST->getTargetLowering()) {}

92 const Function *Callee) const override;

98 unsigned DefaultCallPenalty) const override;

101

103

104

105

106

114 Instruction *Inst = nullptr) const override;

119

120

121

122

123

124

126

128 return ST->hasSVE();

129 }

130

132 bool Vector = (ClassID == 1);

134 if (ST->hasNEON())

135 return 32;

136 return 0;

137 }

138 return 31;

139 }

140

144

145 std::optional<Instruction *>

147

150 APInt &UndefElts2, APInt &UndefElts3,

152 SimplifyAndSetOp) const override;

153

156

158 return ST->getMinVectorRegisterBitWidth();

159 }

160

162 return ST->getVScaleForTuning();

163 }

164

166

169

170

171

172

173

180

182

184

185

186

188 unsigned Opcode2) const;

189

193

196

199

201 Type *Src) const;

202

206 const Instruction *I = nullptr) const override;

207

210 unsigned Index,

212

214 const Instruction *I = nullptr) const override;

215

218 unsigned Index, const Value *Op0,

219 const Value *Op1) const override;

220

221

222

223

224

227 unsigned Index, Value *Scalar,

228 ArrayRef<std::tuple<Value *, User *, int>>

229 ScalarUserAndIdx) const override;

230

233 unsigned Index) const override;

234

238 unsigned Index) const override;

239

243

247

250

256 const Instruction *CxtI = nullptr) const override;

257

261

267 const Instruction *I = nullptr) const override;

268

272

274 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,

277 const Instruction *I = nullptr) const override;

278

281

284 OptimizationRemarkEmitter *ORE) const override;

285

288

291 bool CanCreate = true) const override;

292

294 MemIntrinsicInfo &Info) const override;

295

297 if (Ty->isPointerTy())

298 return true;

299

300 if (Ty->isBFloatTy() && ST->hasBF16())

301 return true;

302

303 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())

304 return true;

305

306 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||

307 Ty->isIntegerTy(32) || Ty->isIntegerTy(64))

308 return true;

309

310 return false;

311 }

312

314 if (!ST->isSVEorStreamingSVEAvailable())

315 return false;

316

317

319 DataType->getPrimitiveSizeInBits() != 128)

320 return false;

321

323 }

324

330

336

338 return Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isIntegerTy(32) ||

339 Ty->isIntegerTy(64);

340 }

341

343 Align Alignment) const override {

344 if (!ST->isSVEAvailable())

345 return false;

346

348 DataType->getPrimitiveSizeInBits() < 128)

349 return false;

350

352 }

353

355 if (!ST->isSVEAvailable())

356 return false;

357

358

360 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||

361 DataTypeFVTy->getNumElements() < 2))

362 return false;

363

365 }

366

370

374

377

378 if (!ST->hasNEON() || NumElements.isScalable())

379 return false;

381 case 8:

382 case 16:

383 case 32:

384 case 64: {

385

386 unsigned VectorBits = NumElements.getFixedValue() * ElementBits;

387 return VectorBits >= 64;

388 }

389 }

390 return false;

391 }

392

394

395

396

397

398

399

400

402 unsigned NumElements = DataTypeTy->getNumElements();

403 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();

404 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&

406 }

408 }

409

413

415

416 if (ST->isLittleEndian())

419 }

420

422 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,

426

428

432 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;

433

436 bool &AllowPromotionWithoutCommonHeader) const override;

437

441

443

445 return ST->hasSVE() ? 5 : 0;

446 }

447

450 if (ST->hasSVE())

451 return IVUpdateMayOverflow

454

456 }

457

459

461

463

465 return ST->isSVEorStreamingSVEAvailable();

466 }

467

469

472

474

475

476

481

484 std::optional FMF,

486

489 VectorType *ValTy, std::optional FMF,

491

493 bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty,

495

500 const Instruction *CxtI = nullptr) const override;

501

503 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,

506

507

508

509

510

511

513 StackOffset BaseOffset, bool HasBaseReg,

514 int64_t Scale,

515 unsigned AddrSpace) const override;

516

518 return ST->enableSelectOptimize();

519 }

520

522

524 Type *ScalarValTy) const override {

525

527 return 4;

528

530 }

531

532 std::optional getMinPageSize() const override { return 4096; }

533

536

539

540};

541

542}

543

544#endif

This file provides a helper that implements much of the TTI interface in terms of the target-independ...

Analysis containing CSE Info

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

uint64_t IntrinsicInst * II

This pass exposes codegen information to IR-level passes.

InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

bool isLegalNTLoad(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:414

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override

unsigned getMaxInterleaveFactor(ElementCount VF) const override

TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override

Definition AArch64TargetTransformInfo.h:449

InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

bool isVScaleKnownToBeAPowerOfTwo() const override

Definition AArch64TargetTransformInfo.h:165

bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const

Definition AArch64TargetTransformInfo.h:393

InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override

Definition AArch64TargetTransformInfo.h:375

InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override

bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const

InstructionCost getIntImmCost(int64_t Val) const

Calculate the cost of materializing a 64-bit value.

std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const

FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...

bool prefersVectorizedAddressing() const override

unsigned getMinTripCountTailFoldingThreshold() const override

Definition AArch64TargetTransformInfo.h:444

std::optional< unsigned > getVScaleForTuning() const override

Definition AArch64TargetTransformInfo.h:161

InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:371

InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override

bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override

Definition AArch64TargetTransformInfo.h:331

bool supportsScalableVectors() const override

Definition AArch64TargetTransformInfo.h:464

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

bool enableSelectOptimize() const override

Definition AArch64TargetTransformInfo.h:517

InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override

bool isElementTypeLegalForScalableVector(Type *Ty) const override

Definition AArch64TargetTransformInfo.h:296

bool preferPredicatedReductionSelect() const override

Definition AArch64TargetTransformInfo.h:473

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

unsigned getNumberOfRegisters(unsigned ClassID) const override

Definition AArch64TargetTransformInfo.h:131

bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned, TTI::MaskKind) const override

Definition AArch64TargetTransformInfo.h:325

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override

bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override

InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override

Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

bool isLegalNTStore(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:410

bool useNeonVector(const Type *Ty) const

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

std::optional< unsigned > getMinPageSize() const override

Definition AArch64TargetTransformInfo.h:532

bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const

Definition AArch64TargetTransformInfo.h:313

unsigned getMinVectorRegisterBitWidth() const override

Definition AArch64TargetTransformInfo.h:157

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override

bool isElementTypeLegalForCompressStore(Type *Ty) const

Definition AArch64TargetTransformInfo.h:337

InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override

AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)

Definition AArch64TargetTransformInfo.h:87

unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override

bool areInlineCompatible(const Function *Caller, const Function *Callee) const override

unsigned getMaxNumElements(ElementCount VF) const

Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...

Definition AArch64TargetTransformInfo.h:174

bool shouldTreatInstructionLikeSelect(const Instruction *I) const override

bool enableOrderedReductions() const override

Definition AArch64TargetTransformInfo.h:427

bool isMultiversionedFunction(const Function &F) const override

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override

bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override

TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override

bool enableInterleavedAccessVectorization() const override

Definition AArch64TargetTransformInfo.h:125

InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override

bool isLegalMaskedGatherScatter(Type *DataType) const

Definition AArch64TargetTransformInfo.h:354

unsigned getGISelRematGlobalCost() const override

Definition AArch64TargetTransformInfo.h:442

bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override

See if I should be considered for address type promotion.

APInt getFeatureMask(const Function &F) const override

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override

bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override

bool enableScalableVectorization() const override

InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override

bool shouldExpandReduction(const IntrinsicInst *II) const override

Definition AArch64TargetTransformInfo.h:438

Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override

bool isLegalMaskedGather(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:367

bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const

Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.

bool enableMaskedInterleavedAccessVectorization() const override

Definition AArch64TargetTransformInfo.h:127

unsigned getEpilogueVectorizationMinVF() const override

InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const

InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const

InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override

Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...

bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const override

Definition AArch64TargetTransformInfo.h:342

bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override

unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override

Definition AArch64TargetTransformInfo.h:523

Class for arbitrary precision integers.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override

BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Convenience struct for specifying and reasoning about fast-math flags.

Container class for subtarget features.

The core instruction combiner logic.

A wrapper class for inspecting calls to intrinsic functions.

Represents a single loop in the control flow graph.

Information for memory intrinsic cost model.

The RecurrenceDescriptor is used to identify recurrences variables in a loop.

This class represents an analyzed expression in the program.

The main scalar evolution driver.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

virtual const DataLayout & getDataLayout() const

virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const

virtual bool isLegalNTStore(Type *DataType, Align Alignment) const

virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

MaskKind

Some targets only support masked load/store with a constant mask.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

PopcntSupportKind

Flags indicating the kind of support for population count.

PartialReductionExtendKind

ShuffleKind

The various kinds of shuffle patterns for vector queries.

CastContextHint

Represents a hint about the context in which a cast is used.

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

LLVM Value Representation.

Base class of all SIMD vector types.

constexpr ScalarTy getFixedValue() const

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

ArrayRef(const T &OneElt) -> ArrayRef< T >

@ DataAndControlFlowWithoutRuntimeCheck

Use predicate to control both data and control flow, but modify the trip count so that a runtime over...

@ DataAndControlFlow

Use predicate to control both data and control flow.

@ DataWithoutLaneMask

Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...

This struct is a compact representation of a valid (non-zero power of two) alignment.

Returns options for expansion of memcmp. IsZeroCmp is.

Parameters that control the generic loop unrolling transformation.