HexagonTargetTransformInfo.cpp Source File (original) (raw)

27using namespace llvm;

29#define DEBUG_TYPE "hexagontti"

36 cl::desc("Allow auto-generation of HVX scatter-gather"));

40 cl::desc("Enable auto-vectorization of floatint point types on v68."));

44 cl::desc("Control lookup table emission on Hexagon target"));

54bool HexagonTTIImpl::useHVX() const {

56}

58bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const {

60 if (!VecTy)

61 return false;

62 if (!ST.isTypeForHVX(VecTy))

63 return false;

64 if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())

65 return true;

67}

69unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {

71 return VTy->getNumElements();

73 "Expecting scalar type");

74 return 1;

75}

95 if (L && L->isInnermost() && canPeel(L) &&

100 }

101}

102

108

109

110

112 bool Vector = ClassID == 1;

114 return useHVX() ? 32 : 0;

115 return 32;

116}

117

119 return useHVX() ? 2 : 1;

120}

121

124 switch (K) {

131 }

132

134}

135

137 return useHVX() ? ST.getVectorLength()*8 : 32;

138}

139

141 bool IsScalable) const {

142 assert(!IsScalable && "Scalable VFs are not supported for Hexagon");

144}

145

151

155 if (ICA.getID() == Intrinsic::bswap) {

156 std::pair<InstructionCost, MVT> LT =

158 return LT.first + 2;

159 }

161}

162

165 const SCEV *S,

167 return 0;

168}

169

176 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);

177

179 return 1;

180

181 if (Opcode == Instruction::Store)

184

185 if (Src->isVectorTy()) {

188 if (isHVXVectorType(VecTy)) {

189 unsigned RegWidth =

192 assert(RegWidth && "Non-zero vector register width expected");

193

194 if (VecWidth % RegWidth == 0)

195 return VecWidth / RegWidth;

196

197 const Align RegAlign(RegWidth / 8);

198 if (Alignment > RegAlign)

199 Alignment = RegAlign;

200 unsigned AlignWidth = 8 * Alignment.value();

201 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;

202 return 3 * NumLoads;

203 }

204

205

206

207 unsigned Cost =

209

210

211 const Align BoundAlignment = std::min(Alignment, Align(8));

212 unsigned AlignWidth = 8 * BoundAlignment.value();

213 unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;

214 if (Alignment == Align(4) || Alignment == Align(8))

215 return Cost * NumLoads;

216

218 unsigned LogA = Log2(BoundAlignment);

219 return (3 - LogA) * Cost * NumLoads;

220 }

221

223 OpInfo, I);

224}

225

234

238 bool UseMaskForCond, bool UseMaskForGaps) const {

239 if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)

243 UseMaskForCond, UseMaskForGaps);

245}

246

252 if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy())

255 if (Opcode == Instruction::FCmp)

256 return LT.first + FloatFactor * getTypeNumElements(ValTy);

257 }

259 Op1Info, Op2Info, I);

260}

261

266

269 Op2Info, Args, CxtI);

270

271 if (Ty->isVectorTy()) {

272 if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy())

275 if (LT.second.isFloatingPoint())

276 return LT.first + FloatFactor * getTypeNumElements(Ty);

277 }

279 Args, CxtI);

280}

281

287 auto isNonHVXFP = [this] (Type *Ty) {

288 return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy();

289 };

290 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))

292

293 if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {

294 unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;

295 unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;

296

300 std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);

301

303 return Cost == 0 ? 0 : 1;

305 }

306 return 1;

307}

308

311 unsigned Index,

312 const Value *Op0,

313 const Value *Op1) const {

315 : Val;

316 if (Opcode == Instruction::InsertElement) {

317

318 unsigned Cost = (Index != 0) ? 2 : 0;

319 if (ElemTy->isIntegerTy(32))

321

323 Index, Op0, Op1);

324 }

325

326 if (Opcode == Instruction::ExtractElement)

327 return 2;

328

329 return 1;

330}

331

333 unsigned ,

335

336

338}

339

341 unsigned ,

343

344

346}

347

349

350 if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||

352 return false;

353

354 switch (Ty->getScalarSizeInBits()) {

355 case 8:

356 return (getTypeNumElements(Ty) == 128);

357 case 16:

358 if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32)

359 return (Alignment >= 2);

360 break;

361 case 32:

362 if (getTypeNumElements(Ty) == 32)

363 return (Alignment >= 4);

364 break;

365 default:

366 break;

367 }

368 return false;

369}

370

372 if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||

374 return false;

375

376 switch (Ty->getScalarSizeInBits()) {

377 case 8:

378 return (getTypeNumElements(Ty) == 128);

379 case 16:

380 if (getTypeNumElements(Ty) == 64)

381 return (Alignment >= 2);

382 break;

383 case 32:

384 if (getTypeNumElements(Ty) == 32)

385 return (Alignment >= 4);

386 break;

387 default:

388 break;

389 }

390 return false;

391}

392

394 Align Alignment) const {

396}

397

399 Align Alignment) const {

401}

402

403

404

406 return ST.getL1PrefetchDistance();

407}

408

410 return ST.getL1CacheLineSize();

411}

412

417 auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {

418 if (!CI->isIntegerCast())

419 return false;

420

421

423 unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());

424 unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());

425 if (DBW != 32 || SBW >= DBW)

426 return false;

427

429

430

431

433 };

434

436 if (isCastFoldedIntoLoad(CI))

439}

440

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

static const unsigned FloatFactor

Definition HexagonTargetTransformInfo.cpp:52

static cl::opt< bool > EnableV68FloatAutoHVX("force-hvx-float", cl::Hidden, cl::desc("Enable auto-vectorization of floatint point types on v68."))

cl::opt< bool > HexagonAllowScatterGatherHVX("hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden, cl::desc("Allow auto-generation of HVX scatter-gather"))

static cl::opt< bool > EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target"))

static cl::opt< bool > HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true), cl::Hidden, cl::desc("Enable masked loads/stores for HVX"))

static cl::opt< bool > HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX"))

This file implements a TargetTransformInfo analysis pass specific to the Hexagon target machine.

This pass exposes codegen information to IR-level passes.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

This is the base class for all instructions that perform data casts.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

A parsed version of the target data layout string in and methods for querying it.

static constexpr ElementCount getFixed(ScalarTy MinVal)

Definition HexagonTargetTransformInfo.cpp:170

bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) const override

Definition HexagonTargetTransformInfo.cpp:398

bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const override

Definition HexagonTargetTransformInfo.cpp:332

ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const override

Definition HexagonTargetTransformInfo.cpp:140

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

Definition HexagonTargetTransformInfo.cpp:282

bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) const override

Definition HexagonTargetTransformInfo.cpp:393

Definition HexagonTargetTransformInfo.cpp:235

unsigned getNumberOfRegisters(unsigned ClassID) const override

— Vector TTI begin —

Definition HexagonTargetTransformInfo.cpp:111

InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *S, TTI::TargetCostKind CostKind) const override

Definition HexagonTargetTransformInfo.cpp:164

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

Definition HexagonTargetTransformInfo.cpp:262

TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override

Definition HexagonTargetTransformInfo.cpp:78

unsigned getMinVectorRegisterBitWidth() const override

Definition HexagonTargetTransformInfo.cpp:136

bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const override

Definition HexagonTargetTransformInfo.cpp:340

bool isLegalMaskedGather(Type *Ty, Align Alignment) const override

Definition HexagonTargetTransformInfo.cpp:348

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override

Definition HexagonTargetTransformInfo.cpp:123

InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override

Compute a cost of the given call instruction.

Definition HexagonTargetTransformInfo.cpp:147

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

Definition HexagonTargetTransformInfo.cpp:91

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

Definition HexagonTargetTransformInfo.cpp:309

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

Get intrinsic cost based on arguments.

Definition HexagonTargetTransformInfo.cpp:153

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

Definition HexagonTargetTransformInfo.cpp:227

TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override

Bias LSR towards creating post-increment opportunities.

Definition HexagonTargetTransformInfo.cpp:104

bool shouldBuildLookupTables() const override

Definition HexagonTargetTransformInfo.cpp:441

unsigned getMaxInterleaveFactor(ElementCount VF) const override

Definition HexagonTargetTransformInfo.cpp:118

bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override

Definition HexagonTargetTransformInfo.cpp:371

Definition HexagonTargetTransformInfo.cpp:247

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override

Definition HexagonTargetTransformInfo.cpp:414

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

Definition HexagonTargetTransformInfo.cpp:85

unsigned getCacheLineSize() const override

Definition HexagonTargetTransformInfo.cpp:409

unsigned getPrefetchDistance() const override

— Vector TTI end —

Definition HexagonTargetTransformInfo.cpp:405

static InstructionCost getMax()

Type * getReturnType() const

Intrinsic::ID getID() const

An instruction for reading from memory.

Represents a single loop in the control flow graph.

This class represents an analyzed expression in the program.

The main scalar evolution driver.

LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Returns the upper bound of the loop trip count as a normal unsigned value.

LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)

Returns the exact trip count of the loop if we can compute it, and the result is a small constant.

virtual const DataLayout & getDataLayout() const

virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const

MaskKind

Some targets only support masked load/store with a constant mask.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

PopcntSupportKind

Flags indicating the kind of support for population count.

@ TCC_Free

Expected to fold away in lowering.

AddressingModeKind

Which addressing mode Loop Strength Reduction will try to generate.

@ AMK_PostIndexed

Prefer post-indexed addressing mode.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

CastContextHint

Represents a hint about the context in which a cast is used.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

static constexpr TypeSize getScalable(ScalarTy MinimumSize)

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

bool isFPOrFPVectorTy() const

Return true if this is a FP type or a vector of FP.

LLVM Value Representation.

bool hasOneUse() const

Return true if there is exactly one use of this value.

Base class of all SIMD vector types.

Type * getElementType() const

constexpr ScalarTy getFixedValue() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

bool canPeel(const Loop *L)

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

unsigned Log2(Align A)

Returns the log2 of the alignment.

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

unsigned PeelCount

A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...

Parameters that control the generic loop unrolling transformation.

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...