LLVM: lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

23#include "llvm/IR/IntrinsicsAMDGPU.h"

28

29#define DEBUG_TYPE "amdgpu-late-codegenprepare"

30

31using namespace llvm;

32

33

34

35

36

38 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",

39 cl::desc("Widen sub-dword constant address space loads in "

40 "AMDGPULateCodeGenPrepare"),

42

43namespace {

44

45class AMDGPULateCodeGenPrepare

46 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {

50

53

55

56public:

59 : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}

60 bool run();

61 bool visitInstruction(Instruction &) { return false; }

62

63

64 bool isDWORDAligned(const Value *V) const {

67 }

68

69 bool canWidenScalarExtLoad(LoadInst &LI) const;

70 bool visitLoadInst(LoadInst &LI);

71};

72

74

75class LiveRegOptimizer {

76private:

78 const DataLayout &DL;

79 const GCNSubtarget &ST;

80

81

82 Type *const ConvertToScalar;

83

85

86 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;

87

88public:

89

90

91 Type *calculateConvertType(Type *OriginalType);

92

93

95

96

97

98 Value *convertFromOptType(Type *ConvertType, Instruction *V,

100 BasicBlock *InsertBlock);

101

102

103

104 bool optimizeLiveType(Instruction *I,

105 SmallVectorImpl &DeadInsts);

106

107

108

109 bool shouldReplace(Type *ITy) {

111 if (!VTy)

112 return false;

113

114 const auto *TLI = ST.getTargetLowering();

115

117

118

121 return false;

122

123

127 }

128

130

131 bool isCoercionProfitable(Instruction *II) {

132 SmallPtrSet<Instruction *, 4> CVisited;

133 SmallVector<Instruction *, 4> UserList;

134

135

136

137 for (User *V : II->users())

140

143 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;

144 return isa<PHINode, ShuffleVectorInst, InsertElementInst,

145 ExtractElementInst, CastInst>(II);

146 };

147

148 while (!UserList.empty()) {

150 if (!CVisited.insert(CII).second)

151 continue;

152

153

154

155 if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&

157 continue;

158

159 if (isOpLegal(CII))

160 return true;

161

162 if (IsLookThru(CII))

163 for (User *V : CII->users())

166 }

167 return false;

168 }

169

170 LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)

171 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),

172 ConvertToScalar(Type::getInt32Ty(Mod.getContext())) {}

173};

174

175}

176

177bool AMDGPULateCodeGenPrepare::run() {

178

179

180

181

182

183

184

185 LiveRegOptimizer LRO(*F.getParent(), ST);

186

188

189 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();

190

194 Changed |= LRO.optimizeLiveType(&I, DeadInsts);

195 }

196

199}

200

201Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {

204

206

207 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);

208 TypeSize ConvertScalarSize = DL.getTypeSizeInBits(ConvertToScalar);

209 unsigned ConvertEltCount =

210 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;

211

212 if (OriginalSize <= ConvertScalarSize)

214

215 return VectorType::get(Type::getIntNTy(Mod.getContext(), ConvertScalarSize),

216 ConvertEltCount, false);

217}

218

219Value *LiveRegOptimizer::convertToOptType(Instruction *V,

222 Type *NewTy = calculateConvertType(V->getType());

223

224 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);

225 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);

226

227 IRBuilder<> Builder(V->getParent(), InsertPt);

228

229

230 if (OriginalSize == NewSize)

231 return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc");

232

233

234 assert(NewSize > OriginalSize);

236

237 SmallVector<int, 8> ShuffleMask;

239 for (unsigned I = 0; I < OriginalElementCount; I++)

241

242 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)

243 ShuffleMask.push_back(OriginalElementCount);

244

245 Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);

246 return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc");

247}

248

249Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,

251 BasicBlock *InsertBB) {

253

254 TypeSize OriginalSize = DL.getTypeSizeInBits(V->getType());

255 TypeSize NewSize = DL.getTypeSizeInBits(NewVTy);

256

258

259 if (OriginalSize == NewSize)

260 return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc");

261

262

263

264 assert(OriginalSize > NewSize);

265

266 if (V->getType()->isVectorTy()) {

270 }

271

272

273

274 VectorType *ExpandedVT = VectorType::get(

279

281 SmallVector<int, 8> ShuffleMask(NarrowElementCount);

282 std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);

283

284 return Builder.CreateShuffleVector(Converted, ShuffleMask);

285}

286

287bool LiveRegOptimizer::optimizeLiveType(

288 Instruction *I, SmallVectorImpl &DeadInsts) {

289 SmallVector<Instruction *, 4> Worklist;

290 SmallPtrSet<PHINode *, 4> PhiNodes;

291 SmallPtrSet<Instruction *, 4> Defs;

292 SmallPtrSet<Instruction *, 4> Uses;

293 SmallPtrSet<Instruction *, 4> Visited;

294

296 while (!Worklist.empty()) {

298

299 if (!Visited.insert(II).second)

300 continue;

301

302 if (!shouldReplace(II->getType()))

303 continue;

304

305 if (!isCoercionProfitable(II))

306 continue;

307

309 PhiNodes.insert(Phi);

310

311 for (Value *V : Phi->incoming_values()) {

312

314 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))

316 continue;

317 }

318

320

322 return false;

323

324

325 if (IncInst)

326 Defs.insert(IncInst);

327 }

328 }

329

330

331 for (User *V : II->users()) {

332

334 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))

336 continue;

337 }

338

340

342 Uses.insert(UseInst);

345 }

346 }

347 }

348

349

350 for (Instruction *D : Defs) {

353 Value *ConvertVal = convertToOptType(D, InsertPt);

355 ValMap[D] = ConvertVal;

356 }

357 }

358

359

360 for (PHINode *Phi : PhiNodes) {

362 Phi->getNumIncomingValues(),

363 Phi->getName() + ".tc", Phi->getIterator());

364 }

365

366

367 for (PHINode *Phi : PhiNodes) {

369 bool MissingIncVal = false;

370 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {

371 Value *IncVal = Phi->getIncomingValue(I);

373 Type *NewType = calculateConvertType(Phi->getType());

374 NewPhi->addIncoming(ConstantInt::get(NewType, 0, false),

375 Phi->getIncomingBlock(I));

376 } else if (Value *Val = ValMap.lookup(IncVal))

378 else

379 MissingIncVal = true;

380 }

381 if (MissingIncVal) {

382 Value *DeadVal = ValMap[Phi];

383

384

386 SmallPtrSet<Value *, 4> VisitedPhis;

388 while (!PHIWorklist.empty()) {

390 VisitedPhis.insert(NextDeadValue);

391 auto OriginalPhi =

392 llvm::find_if(PhiNodes, [this, &NextDeadValue](PHINode *CandPhi) {

393 return ValMap[CandPhi] == NextDeadValue;

394 });

395

396

397 if (OriginalPhi != PhiNodes.end())

398 ValMap.erase(*OriginalPhi);

399

401

402 for (User *U : NextDeadValue->users()) {

405 }

406 }

407 } else {

409 }

410 }

411

412 for (Instruction *U : Uses) {

413

416 Value *NewVal = nullptr;

417 if (BBUseValMap.contains(U->getParent()) &&

418 BBUseValMap[U->getParent()].contains(Val))

419 NewVal = BBUseValMap[U->getParent()][Val];

420 else {

422

423

424

427 NewVal = Op;

428 } else {

429 NewVal =

431 InsertPt, U->getParent());

432 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;

433 }

434 }

436 U->setOperand(OpIdx, NewVal);

437 }

438 }

439 }

440

441 return true;

442}

443

444bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {

446

449 return false;

450

452 return false;

454

456 return false;

457 unsigned TySize = DL.getTypeStoreSize(Ty);

458

459 if (TySize >= 4)

460 return false;

461

462 if (LI.getAlign() < DL.getABITypeAlign(Ty))

463 return false;

464

466}

467

468bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {

470 return false;

471

472

473

475 return false;

476

477 if (!canWidenScalarExtLoad(LI))

478 return false;

479

483

484

485 if (!isDWORDAligned(Base))

486 return false;

487

488 int64_t Adjust = Offset & 0x3;

489 if (Adjust == 0) {

490

491

493 return true;

494 }

495

497 IRB.SetCurrentDebugLocation(LI.getDebugLoc());

498

499 unsigned LdBits = DL.getTypeStoreSizeInBits(LI.getType());

500 auto *IntNTy = Type::getIntNTy(LI.getContext(), LdBits);

501

502 auto *NewPtr = IRB.CreateConstGEP1_64(

503 IRB.getInt8Ty(),

506

507 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));

509 NewLd->setMetadata(LLVMContext::MD_range, nullptr);

510

511 unsigned ShAmt = Adjust * 8;

512 Value *NewVal = IRB.CreateBitCast(

513 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt),

514 DL.typeSizeEqualsStoreSize(LI.getType()) ? IntNTy

519

520 return true;

521}

522

523PreservedAnalyses

528

529 bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();

530

535 return PA;

536}

537

539public:

541

543

545 return "AMDGPU IR late optimizations";

546 }

547

555

557};

558

561 return false;

562

566

571

572 return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();

573}

574

576 "AMDGPU IR late optimizations", false, false)

582

584

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static bool runOnFunction(Function &F, bool PostInlining)

Machine Check Debug Module

MachineInstr unsigned OpIdx

uint64_t IntrinsicInst * II

if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Remove Loads Into Fake Uses

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

Target-Independent Code Generator Pass Configuration Options pass.

LLVM IR instance of the generic uniformity analysis.

static char ID

Definition AMDGPULateCodeGenPrepare.cpp:540

bool runOnFunction(Function &F) override

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

Definition AMDGPULateCodeGenPrepare.cpp:559

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

Definition AMDGPULateCodeGenPrepare.cpp:548

AMDGPULateCodeGenPrepareLegacy()

Definition AMDGPULateCodeGenPrepare.cpp:542

StringRef getPassName() const override

getPassName - Return a nice clean name for a pass.

Definition AMDGPULateCodeGenPrepare.cpp:544

PreservedAnalyses run(Function &, FunctionAnalysisManager &)

Definition AMDGPULateCodeGenPrepare.cpp:524

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

InstListType::iterator iterator

Instruction iterators...

Represents analyses that only rely on functions' control flow.

A parsed version of the target data layout string in and methods for querying it.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

bool erase(const KeyT &Val)

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

FunctionPass class - This class is used to implement most global optimizations.

bool skipFunction(const Function &F) const

Optional passes call this function to check whether the pass should be skipped.

bool isUniform(ConstValueRefT V) const

Whether V is uniform/non-divergent.

Base class for instruction visitors.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

void setAlignment(Align Align)

Value * getPointerOperand()

Align getAlign() const

Return the alignment of the access that is being performed.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

AnalysisType & getAnalysis() const

getAnalysis() - This function is used by subclasses to get to the analysis information ...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

std::pair< LegalizeTypeAction, EVT > LegalizeKind

LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.

Primary interface to the complete machine description for the target machine.

const STC & getSubtarget(const Function &F) const

This method returns a pointer to the specified type of TargetSubtargetInfo.

Target-Independent Code Generator Pass Configuration Options.

TMC & getTM() const

Get the right type of TargetMachine for this target.

bool isAggregateType() const

Return true if the type is an aggregate type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

Analysis pass which computes UniformityInfo.

Legacy analysis pass which computes a CycleInfo.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

Type * getElementType() const

constexpr ScalarTy getFixedValue() const

const ParentTy * getParent() const

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

NodeAddr< PhiNode * > Phi

Context & getContext() const

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

GenericUniformityInfo< SSAContext > UniformityInfo

FunctionAddr VTableAddr Value

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)

Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

auto reverse(ContainerTy &&C)

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

DWARFExpression::Operation Op

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()

Definition AMDGPULateCodeGenPrepare.cpp:585

DenseMap< const Value *, Value * > ValueToValueMap

static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

unsigned countMinTrailingZeros() const

Returns the minimum number of trailing zero bits.