LLVM: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

40#include "llvm/IR/IntrinsicsAMDGPU.h"

41#include "llvm/IR/IntrinsicsR600.h"

47

48#define DEBUG_TYPE "amdgpu-promote-alloca"

49

50using namespace llvm;

51

52namespace {

53

55 DisablePromoteAllocaToVector("disable-promote-alloca-to-vector",

56 cl::desc("Disable promote alloca to vector"),

58

60 DisablePromoteAllocaToLDS("disable-promote-alloca-to-lds",

61 cl::desc("Disable promote alloca to LDS"),

63

65 "amdgpu-promote-alloca-to-vector-limit",

66 cl::desc("Maximum byte size to consider promote alloca to vector"),

68

70 "amdgpu-promote-alloca-to-vector-max-regs",

72 "Maximum vector size (in 32b registers) to use when promoting alloca"),

74

75

76

78 "amdgpu-promote-alloca-to-vector-vgpr-ratio",

79 cl::desc("Ratio of VGPRs to budget for promoting alloca to vectors"),

81

83 LoopUserWeight("promote-alloca-vector-loop-user-weight",

84 cl::desc("The bonus weight of users of allocas within loop "

85 "when sorting profitable allocas"),

87

88

89class AMDGPUPromoteAllocaImpl {

90private:

95

96

98 uint32_t CurrentLocalMemUsage = 0;

99 unsigned MaxVGPRs;

100 unsigned VGPRBudgetRatio;

101 unsigned MaxVectorRegs;

102

103 bool IsAMDGCN = false;

104 bool IsAMDHSA = false;

105

106 std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);

108

109

110

111 bool collectUsesWithPtrTypes(Value *BaseAlloca, Value *Val,

112 std::vector<Value *> &WorkList) const;

113

114

115

116

117

118 bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,

120 int OpIdx1) const;

121

122

123 bool hasSufficientLocalMem(const Function &F);

124

126 bool tryPromoteAllocaToVector(AllocaInst &I);

127 bool tryPromoteAllocaToLDS(AllocaInst &I, bool SufficientLDS);

128

130

131 void setFunctionLimits(const Function &F);

132

133public:

135

137 IsAMDGCN = TT.isAMDGCN();

139 }

140

141 bool run(Function &F, bool PromoteToLDS);

142};

143

144

145class AMDGPUPromoteAlloca : public FunctionPass {

146public:

147 static char ID;

148

150

152 if (skipFunction(F))

153 return false;

154 if (auto *TPC = getAnalysisIfAvailable())

155 return AMDGPUPromoteAllocaImpl(

157 getAnalysis().getLoopInfo())

158 .run(F, true);

159 return false;

160 }

161

162 StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }

163

164 void getAnalysisUsage(AnalysisUsage &AU) const override {

168 }

169};

170

171static unsigned getMaxVGPRs(unsigned LDSBytes, const TargetMachine &TM,

174 return 128;

175

177

179

180

181 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())

182 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();

183

184 unsigned MaxVGPRs = ST.getMaxNumVGPRs(

185 ST.getWavesPerEU(ST.getFlatWorkGroupSizes(F), LDSBytes, F).first,

186 DynamicVGPRBlockSize);

187

188

189

190

191 if (F.hasFnAttribute(Attribute::AlwaysInline) &&

193 MaxVGPRs = std::min(MaxVGPRs, 32u);

194 return MaxVGPRs;

195}

196

197}

198

199char AMDGPUPromoteAlloca::ID = 0;

200

202 "AMDGPU promote alloca to vector or LDS", false, false)

203

204

208 "AMDGPU promote alloca to vector or LDS", false, false)

209

211

215 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, true);

219 return PA;

220 }

222}

223

227 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, false);

231 return PA;

232 }

234}

235

237 return new AMDGPUPromoteAlloca();

238}

239

243 while (!WorkList.empty()) {

244 auto *Cur = WorkList.pop_back_val();

245 for (auto &U : Cur->uses()) {

246 Uses.push_back(&U);

247

250 }

251 }

252}

253

254void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(

257

258 for (auto *Alloca : Allocas) {

259 LLVM_DEBUG(dbgs() << "Scoring: " << *Alloca << "\n");

260 unsigned &Score = Scores[Alloca];

261

264 for (auto *U : Uses) {

267 continue;

268 unsigned UserScore =

269 1 + (LoopUserWeight * LI.getLoopDepth(Inst->getParent()));

270 LLVM_DEBUG(dbgs() << " [+" << UserScore << "]:\t" << *Inst << "\n");

271 Score += UserScore;

272 }

273 LLVM_DEBUG(dbgs() << " => Final Score:" << Score << "\n");

274 }

275

277 return Scores.at(A) > Scores.at(B);

278 });

279

280

282 dbgs() << "Sorted Worklist:\n";

283 for (auto *A: Allocas)

284 dbgs() << " " << *A << "\n";

285 );

286

287}

288

289void AMDGPUPromoteAllocaImpl::setFunctionLimits(const Function &F) {

290

291

292

293 const int R600MaxVectorRegs = 16;

294 MaxVectorRegs = F.getFnAttributeAsParsedInteger(

295 "amdgpu-promote-alloca-to-vector-max-regs",

296 IsAMDGCN ? PromoteAllocaToVectorMaxRegs : R600MaxVectorRegs);

297 if (PromoteAllocaToVectorMaxRegs.getNumOccurrences())

298 MaxVectorRegs = PromoteAllocaToVectorMaxRegs;

299 VGPRBudgetRatio = F.getFnAttributeAsParsedInteger(

300 "amdgpu-promote-alloca-to-vector-vgpr-ratio",

301 PromoteAllocaToVectorVGPRRatio);

302 if (PromoteAllocaToVectorVGPRRatio.getNumOccurrences())

303 VGPRBudgetRatio = PromoteAllocaToVectorVGPRRatio;

304}

305

306bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) {

307 Mod = F.getParent();

308 DL = &Mod->getDataLayout();

309

311 if (ST.isPromoteAllocaEnabled())

312 return false;

313

314 bool SufficientLDS = PromoteToLDS && hasSufficientLocalMem(F);

315 MaxVGPRs = getMaxVGPRs(CurrentLocalMemUsage, TM, F);

316 setFunctionLimits(F);

317

318 unsigned VectorizationBudget =

319 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8

320 : (MaxVGPRs * 32)) /

321 VGPRBudgetRatio;

322

326

327

328 if (!AI->isStaticAlloca() || AI->isArrayAllocation())

329 continue;

331 }

332 }

333

334 sortAllocasToPromote(Allocas);

335

338 const unsigned AllocaCost = DL->getTypeSizeInBits(AI->getAllocatedType());

339

340 if (AllocaCost <= VectorizationBudget) {

341

342

343 if (tryPromoteAllocaToVector(*AI)) {

345 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&

346 "Underflow!");

347 VectorizationBudget -= AllocaCost;

348 LLVM_DEBUG(dbgs() << " Remaining vectorization budget:"

349 << VectorizationBudget << "\n");

350 continue;

351 }

352 } else {

353 LLVM_DEBUG(dbgs() << "Alloca too big for vectorization (size:"

354 << AllocaCost << ", budget:" << VectorizationBudget

355 << "): " << *AI << "\n");

356 }

357

358 if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))

360 }

361

362

363

364

365

367}

368

373

374

375

376

380

381

382

383

384

385

387 return I->getOperand(0) == AI &&

389}

390

392 Value *Ptr, const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {

394 if (GEP)

396

397 auto I = GEPIdx.find(GEP);

398 assert(I != GEPIdx.end() && "Must have entry for GEP!");

399

400 Value *IndexValue = I->second;

401 assert(IndexValue && "index value missing from GEP index map");

402 return IndexValue;

403}

404

408

409

411 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());

413 APInt ConstOffset(BW, 0);

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

434 if (!CurGEP->collectOffset(DL, BW, VarOffsets, ConstOffset))

435 return nullptr;

436

437

438 CurPtr = CurGEP->getPointerOperand();

439 }

440

441 assert(CurPtr == Alloca && "GEP not based on alloca");

442

443 int64_t VecElemSize = DL.getTypeAllocSize(VecElemTy);

444 if (VarOffsets.size() > 1)

445 return nullptr;

446

448 int64_t Rem;

449 APInt::sdivrem(ConstOffset, VecElemSize, IndexQuot, Rem);

450 if (Rem != 0)

451 return nullptr;

452 if (VarOffsets.size() == 0)

453 return ConstantInt::get(Ctx, IndexQuot);

454

456

457 const auto &VarOffset = VarOffsets.front();

458 APInt OffsetQuot;

459 APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);

460 if (Rem != 0 || OffsetQuot.isZero())

461 return nullptr;

462

465 return nullptr;

466

467 Offset = Builder.CreateSExtOrTrunc(Offset, Builder.getIntNTy(BW));

468 if (Offset != VarOffset.first)

470

471 if (!OffsetQuot.isOne()) {

473 Offset = Builder.CreateMul(Offset, ConstMul);

476 }

477 if (ConstOffset.isZero())

479

481 Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);

484 return IndexAdd;

485}

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

504 unsigned VecStoreSize, unsigned ElementSize,

506 std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,

508

509

512 Builder.SetInsertPoint(Inst);

513

514 const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,

516 assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));

517 const unsigned Size = DL.getTypeStoreSizeInBits(PtrTy);

518 if (!PtrTy->isVectorTy())

519 return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(Size));

521

522

523 assert((Size % NumPtrElts == 0) && "Vector size not divisble");

525 return Builder.CreateBitOrPointerCast(

527 };

528

530

532 case Instruction::Load: {

533 Value *CurVal = GetCurVal();

536

537

539 TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);

541 if (CI->isZeroValue() && AccessSize == VecStoreSize) {

543 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);

545 CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType());

546 Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);

548 return nullptr;

549 }

550 }

551

552

555 const unsigned NumLoadedElts = AccessSize / DL.getTypeStoreSize(VecEltTy);

557 assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));

558

560 for (unsigned K = 0; K < NumLoadedElts; ++K) {

562 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));

563 SubVec = Builder.CreateInsertElement(

564 SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);

565 }

566

568 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);

569 else if (SubVecTy->isPtrOrPtrVectorTy())

570 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);

571

572 SubVec = Builder.CreateBitOrPointerCast(SubVec, AccessTy);

574 return nullptr;

575 }

576

577

578 Value *ExtractElement = Builder.CreateExtractElement(CurVal, Index);

579 if (AccessTy != VecEltTy)

580 ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);

581

583 return nullptr;

584 }

585 case Instruction::Store: {

586

587

588

589

592 Value *Val = SI->getValueOperand();

593

594

596 TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);

598 if (CI->isZeroValue() && AccessSize == VecStoreSize) {

600 Val = CreateTempPtrIntCast(Val, AccessTy);

602 Val = CreateTempPtrIntCast(Val, VectorTy);

603 return Builder.CreateBitOrPointerCast(Val, VectorTy);

604 }

605 }

606

607

610 const unsigned NumWrittenElts =

611 AccessSize / DL.getTypeStoreSize(VecEltTy);

612 const unsigned NumVecElts = VectorTy->getNumElements();

614 assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));

615

616 if (SubVecTy->isPtrOrPtrVectorTy())

617 Val = CreateTempPtrIntCast(Val, SubVecTy);

619 Val = CreateTempPtrIntCast(Val, AccessTy);

620

621 Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);

622

623 Value *CurVec = GetCurVal();

624 for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);

625 K < NumElts; ++K) {

627 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));

628 CurVec = Builder.CreateInsertElement(

629 CurVec, Builder.CreateExtractElement(Val, K), CurIdx);

630 }

631 return CurVec;

632 }

633

634 if (Val->getType() != VecEltTy)

635 Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);

636 return Builder.CreateInsertElement(GetCurVal(), Val, Index);

637 }

638 case Instruction::Call: {

640

642 unsigned NumCopied = Length->getZExtValue() / ElementSize;

646

648 for (unsigned Idx = 0; Idx < VectorTy->getNumElements(); ++Idx) {

649 if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {

651 ? SrcBegin++

653 } else {

654 Mask.push_back(Idx);

655 }

656 }

657

658 return Builder.CreateShuffleVector(GetCurVal(), Mask);

659 }

660

662

663

664 Value *Elt = MSI->getOperand(1);

665 const unsigned BytesPerElt = DL.getTypeStoreSize(VecEltTy);

666 if (BytesPerElt > 1) {

667 Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt);

668

669

670

672 Type *PtrInt = Builder.getIntNTy(BytesPerElt * 8);

673 Elt = Builder.CreateBitCast(EltBytes, PtrInt);

674 Elt = Builder.CreateIntToPtr(Elt, VecEltTy);

675 } else

676 Elt = Builder.CreateBitCast(EltBytes, VecEltTy);

677 }

678

679 return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt);

680 }

681

683 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {

684 Intr->replaceAllUsesWith(

685 Builder.getIntN(Intr->getType()->getIntegerBitWidth(),

686 DL.getTypeAllocSize(VectorTy)));

687 return nullptr;

688 }

689 }

690

691 llvm_unreachable("Unsupported call when promoting alloca to vector");

692 }

693

694 default:

695 llvm_unreachable("Inconsistency in instructions promotable to vector");

696 }

697

698 llvm_unreachable("Did not return after promoting instruction!");

699}

700

703

704

705

706

707

708

709

710

711

712

713

715 TypeSize AccTS = DL.getTypeStoreSize(AccessTy);

716

717

718

719 if (AccTS * 8 != DL.getTypeSizeInBits(AccessTy))

720 return false;

723 }

724

727}

728

729

730

731template

734

735

736

740

743 auto &BlockUses = UsesByBlock[BB];

744

745

746 if (BlockUses.empty())

747 continue;

748

749

750 if (BlockUses.size() == 1) {

752 continue;

753 }

754

755

757 if (!BlockUses.contains(&Inst))

758 continue;

759

760 Fn(&Inst);

761 }

762

763

764 BlockUses.clear();

765 }

766}

767

768

769

776

778AMDGPUPromoteAllocaImpl::getVectorTypeForAlloca(Type *AllocaTy) const {

779 if (DisablePromoteAllocaToVector) {

780 LLVM_DEBUG(dbgs() << " Promote alloca to vectors is disabled\n");

781 return nullptr;

782 }

783

786 uint64_t NumElems = 1;

787 Type *ElemTy;

788 do {

789 NumElems *= ArrayTy->getNumElements();

790 ElemTy = ArrayTy->getElementType();

792

793

795 if (InnerVectorTy) {

796 NumElems *= InnerVectorTy->getNumElements();

797 ElemTy = InnerVectorTy->getElementType();

798 }

799

801 unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;

802 if (ElementSize > 0) {

803 unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);

804

805

806

807

808 if (NumElems * ElementSize != AllocaSize)

809 NumElems = AllocaSize / ElementSize;

810 if (NumElems > 0 && (AllocaSize % ElementSize) == 0)

812 }

813 }

814 }

815 if (!VectorTy) {

816 LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");

817 return nullptr;

818 }

819

820 const unsigned MaxElements =

821 (MaxVectorRegs * 32) / DL->getTypeSizeInBits(VectorTy->getElementType());

822

826 << " has an unsupported number of elements\n");

827 return nullptr;

828 }

829

831 unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy);

832 if (ElementSizeInBits != DL->getTypeAllocSizeInBits(VecEltTy)) {

833 LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "

834 "does not match the type's size\n");

835 return nullptr;

836 }

837

838 return VectorTy;

839}

840

841

842bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {

843 LLVM_DEBUG(dbgs() << "Trying to promote to vectors: " << Alloca << '\n');

844

846 FixedVectorType *VectorTy = getVectorTypeForAlloca(AllocaTy);

847 if (!VectorTy)

848 return false;

849

850 std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;

856

858 LLVM_DEBUG(dbgs() << " Cannot promote alloca to vector: " << Msg << "\n"

859 << " " << *Inst << "\n");

860 for (auto *Inst : reverse(NewGEPInsts))

862 return false;

863 };

864

867

868 LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n");

869

871 unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8;

872 assert(ElementSize > 0);

873 for (auto *U : Uses) {

875

877

880 return RejectUser(Inst, "pointer is being stored");

881

884 return RejectUser(Inst, "unsupported load/store as aggregate");

886

887

890 if (!IsSimple)

891 return RejectUser(Inst, "not a simple load or store");

892

893 Ptr = Ptr->stripPointerCasts();

894

895

896 if (Ptr == &Alloca && DL->getTypeStoreSize(Alloca.getAllocatedType()) ==

897 DL->getTypeStoreSize(AccessTy)) {

899 continue;

900 }

901

903 return RejectUser(Inst, "not a supported access type");

904

906 continue;

907 }

908

910

911

913 if (!Index)

914 return RejectUser(Inst, "cannot compute vector index for GEP");

915

918 continue;

919 }

920

924 continue;

925 }

926

928 if (TransferInst->isVolatile())

929 return RejectUser(Inst, "mem transfer inst is volatile");

930

932 if (!Len || (Len->getZExtValue() % ElementSize))

933 return RejectUser(Inst, "mem transfer inst length is non-constant or "

934 "not a multiple of the vector element size");

935

936 if (TransferInfo.try_emplace(TransferInst).second) {

939 }

940

941 auto getPointerIndexOfAlloca = [&](Value *Ptr) -> ConstantInt * {

943 if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))

944 return nullptr;

945

947 };

948

949 unsigned OpNum = U->getOperandNo();

950 MemTransferInfo *TI = &TransferInfo[TransferInst];

951 if (OpNum == 0) {

952 Value *Dest = TransferInst->getDest();

954 if (!Index)

955 return RejectUser(Inst, "could not calculate constant dest index");

957 } else {

959 Value *Src = TransferInst->getSource();

961 if (!Index)

962 return RejectUser(Inst, "could not calculate constant src index");

964 }

965 continue;

966 }

967

969 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {

971 continue;

972 }

973 }

974

975

978 return RejectUser(Inst, "assume-like intrinsic cannot have any users");

980 continue;

981 }

982

984 return isAssumeLikeIntrinsic(cast(U));

985 })) {

987 continue;

988 }

989

990 return RejectUser(Inst, "unhandled alloca user");

991 }

992

993 while (!DeferredInsts.empty()) {

996

997

998 MemTransferInfo &Info = TransferInfo[TransferInst];

999 if (Info.SrcIndex || Info.DestIndex)

1000 return RejectUser(

1001 Inst, "mem transfer inst is missing constant src and/or dst index");

1002 }

1003

1004 LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "

1005 << *VectorTy << '\n');

1006 const unsigned VecStoreSize = DL->getTypeStoreSize(VectorTy);

1007

1008

1009

1011 Updater.Initialize(VectorTy, "promotealloca");

1012

1016

1017 Value *AllocaInitValue =

1019 AllocaInitValue->takeName(&Alloca);

1020

1022

1023

1024

1025

1026

1030 auto GetCurVal = [&]() -> Value * {

1032 return CurVal;

1033

1034 if (!Placeholders.empty() && Placeholders.back()->getParent() == BB)

1035 return Placeholders.back();

1036

1037

1038

1042 Placeholders.push_back(Placeholder);

1043 return Placeholders.back();

1044 };

1045

1048 TransferInfo, GEPVectorIdx, GetCurVal);

1049 if (Result)

1051 });

1052

1053

1054 for (Instruction *Placeholder : Placeholders) {

1055 Placeholder->replaceAllUsesWith(

1057 Placeholder->eraseFromParent();

1058 }

1059

1060

1061

1065 I->eraseFromParent();

1066 }

1067

1068

1070 I->dropDroppableUses();

1072 I->eraseFromParent();

1073 }

1074

1075

1078 return true;

1079}

1080

1081std::pair<Value *, Value *>

1082AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {

1085

1086 if (!IsAMDHSA) {

1088 Builder.CreateIntrinsic(Intrinsic::r600_read_local_size_y, {});

1090 Builder.CreateIntrinsic(Intrinsic::r600_read_local_size_z, {});

1091

1092 ST.makeLIDRangeMetadata(LocalSizeY);

1093 ST.makeLIDRangeMetadata(LocalSizeZ);

1094

1095 return std::pair(LocalSizeY, LocalSizeZ);

1096 }

1097

1098

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110

1111

1112

1113

1114

1115

1116

1117

1118

1119

1120

1121

1122

1123

1124

1125

1126

1127

1128

1129

1130

1131

1133 Builder.CreateIntrinsic(Intrinsic::amdgcn_dispatch_ptr, {});

1134 DispatchPtr->addRetAttr(Attribute::NoAlias);

1135 DispatchPtr->addRetAttr(Attribute::NonNull);

1136 F.removeFnAttr("amdgpu-no-dispatch-ptr");

1137

1138

1140

1142

1143

1144

1145

1148

1151

1153 LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);

1154 LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);

1155 ST.makeLIDRangeMetadata(LoadZU);

1156

1157

1159

1160 return std::pair(Y, LoadZU);

1161}

1162

1163Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,

1164 unsigned N) {

1169

1170 switch (N) {

1171 case 0:

1172 IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x

1174 AttrName = "amdgpu-no-workitem-id-x";

1175 break;

1176 case 1:

1177 IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y

1179 AttrName = "amdgpu-no-workitem-id-y";

1180 break;

1181

1182 case 2:

1183 IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z

1185 AttrName = "amdgpu-no-workitem-id-z";

1186 break;

1187 default:

1189 }

1190

1193 ST.makeLIDRangeMetadata(CI);

1194 F->removeFnAttr(AttrName);

1195

1196 return CI;

1197}

1198

1201 if (II)

1202 return false;

1203

1204 switch (II->getIntrinsicID()) {

1205 case Intrinsic::memcpy:

1206 case Intrinsic::memmove:

1207 case Intrinsic::memset:

1208 case Intrinsic::lifetime_start:

1209 case Intrinsic::lifetime_end:

1210 case Intrinsic::invariant_start:

1211 case Intrinsic::invariant_end:

1212 case Intrinsic::launder_invariant_group:

1213 case Intrinsic::strip_invariant_group:

1214 case Intrinsic::objectsize:

1215 return true;

1216 default:

1217 return false;

1218 }

1219}

1220

1221bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(

1223 int OpIdx1) const {

1224

1226 if (Val == OtherOp)

1228

1230 return true;

1231

1232

1235 return false;

1236

1237

1238

1239

1240

1241

1242

1243 if (OtherObj != BaseAlloca) {

1245 dbgs() << "Found a binary instruction with another alloca object\n");

1246 return false;

1247 }

1248

1249 return true;

1250}

1251

1252bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(

1253 Value *BaseAlloca, Value *Val, std::vector<Value *> &WorkList) const {

1254

1257 continue;

1258

1261 return false;

1262

1263 WorkList.push_back(User);

1264 continue;

1265 }

1266

1268 if (UseInst->getOpcode() == Instruction::PtrToInt)

1269 return false;

1270

1272 if (LI->isVolatile())

1273 return false;

1274 continue;

1275 }

1276

1278 if (SI->isVolatile())

1279 return false;

1280

1281

1282 if (SI->getPointerOperand() != Val)

1283 return false;

1284 continue;

1285 }

1286

1288 if (RMW->isVolatile())

1289 return false;

1290 continue;

1291 }

1292

1294 if (CAS->isVolatile())

1295 return false;

1296 continue;

1297 }

1298

1299

1300

1302 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))

1303 return false;

1304

1305

1306 WorkList.push_back(ICmp);

1307 continue;

1308 }

1309

1311

1312

1313 if (GEP->isInBounds())

1314 return false;

1316

1317

1318 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))

1319 return false;

1321

1322

1323

1324

1325 switch (Phi->getNumIncomingValues()) {

1326 case 1:

1327 break;

1328 case 2:

1329 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))

1330 return false;

1331 break;

1332 default:

1333 return false;

1334 }

1336

1337

1338

1339

1340

1341

1342

1343 return false;

1344 }

1345

1346 WorkList.push_back(User);

1347 if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))

1348 return false;

1349 }

1350

1351 return true;

1352}

1353

1354bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {

1355

1358

1359

1360

1361

1362 for (Type *ParamTy : FTy->params()) {

1365 LocalMemLimit = 0;

1366 LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "

1367 "local memory disabled.\n");

1368 return false;

1369 }

1370 }

1371

1372 LocalMemLimit = ST.getAddressableLocalMemorySize();

1373 if (LocalMemLimit == 0)

1374 return false;

1375

1379

1381 for (const User *U : Val->users()) {

1383 if (Use->getFunction() == &F)

1384 return true;

1385 } else {

1387 if (VisitedConstants.insert(C).second)

1389 }

1390 }

1391

1392 return false;

1393 };

1394

1397 continue;

1398

1399 if (visitUsers(&GV, &GV)) {

1400 UsedLDS.insert(&GV);

1402 continue;

1403 }

1404

1405

1406

1407 while (Stack.empty()) {

1409 if (visitUsers(&GV, C)) {

1410 UsedLDS.insert(&GV);

1412 break;

1413 }

1414 }

1415 }

1416

1420

1422 Align Alignment =

1424 uint64_t AllocSize = DL.getTypeAllocSize(GV->getValueType());

1425

1426

1427

1429 LocalMemLimit = 0;

1430 LLVM_DEBUG(dbgs() << "Function has a reference to externally allocated "

1431 "local memory. Promoting to local memory "

1432 "disabled.\n");

1433 return false;

1434 }

1435

1436 AllocatedSizes.emplace_back(AllocSize, Alignment);

1437 }

1438

1439

1440

1441

1442

1444

1445

1446 CurrentLocalMemUsage = 0;

1447

1448

1449

1450

1451

1452 for (auto Alloc : AllocatedSizes) {

1453 CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Alloc.second);

1454 CurrentLocalMemUsage += Alloc.first;

1455 }

1456

1457 unsigned MaxOccupancy =

1458 ST.getWavesPerEU(ST.getFlatWorkGroupSizes(F), CurrentLocalMemUsage, F)

1459 .second;

1460

1461

1462 unsigned MaxSizeWithWaveCount =

1463 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);

1464

1465

1466 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)

1467 return false;

1468

1469 LocalMemLimit = MaxSizeWithWaveCount;

1470

1471 LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage

1472 << " bytes of LDS\n"

1473 << " Rounding size to " << MaxSizeWithWaveCount

1474 << " with a maximum occupancy of " << MaxOccupancy << '\n'

1475 << " and " << (LocalMemLimit - CurrentLocalMemUsage)

1476 << " available for promotion\n");

1477

1478 return true;

1479}

1480

1481

1482bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,

1483 bool SufficientLDS) {

1484 LLVM_DEBUG(dbgs() << "Trying to promote to LDS: " << I << '\n');

1485

1486 if (DisablePromoteAllocaToLDS) {

1487 LLVM_DEBUG(dbgs() << " Promote alloca to LDS is disabled\n");

1488 return false;

1489 }

1490

1493

1496

1497

1498

1499

1500 switch (CC) {

1503 break;

1504 default:

1507 << " promote alloca to LDS not supported with calling convention.\n");

1508 return false;

1509 }

1510

1511

1512 if (!SufficientLDS)

1513 return false;

1514

1516 unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;

1517

1518 Align Alignment =

1519 DL.getValueOrABITypeAlignment(I.getAlign(), I.getAllocatedType());

1520

1521

1522

1523

1524

1525

1526

1527 uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);

1528 uint32_t AllocSize =

1529 WorkGroupSize * DL.getTypeAllocSize(I.getAllocatedType());

1530 NewSize += AllocSize;

1531

1532 if (NewSize > LocalMemLimit) {

1534 << " bytes of local memory not available to promote\n");

1535 return false;

1536 }

1537

1538 CurrentLocalMemUsage = NewSize;

1539

1540 std::vector<Value *> WorkList;

1541

1542 if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {

1543 LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");

1544 return false;

1545 }

1546

1547 LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n");

1548

1550

1554 Twine(F->getName()) + Twine('.') + I.getName(), nullptr,

1558

1559 Value *TCntY, *TCntZ;

1560

1561 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);

1562 Value *TIdX = getWorkitemID(Builder, 0);

1563 Value *TIdY = getWorkitemID(Builder, 1);

1564 Value *TIdZ = getWorkitemID(Builder, 2);

1565

1566 Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);

1567 Tmp0 = Builder.CreateMul(Tmp0, TIdX);

1568 Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);

1570 TID = Builder.CreateAdd(TID, TIdZ);

1571

1574

1577 I.replaceAllUsesWith(Offset);

1578 I.eraseFromParent();

1579

1581

1583

1584 for (Value *V : WorkList) {

1586 if (Call) {

1590

1594

1597

1598 continue;

1599 }

1600

1601

1602

1604 continue;

1605

1606 assert(V->getType()->isPtrOrPtrVectorTy());

1607

1608 Type *NewTy = V->getType()->getWithNewType(NewPtrTy);

1609 V->mutateType(NewTy);

1610

1611

1615

1619 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {

1621 Phi->getIncomingValue(I)))

1623 }

1624 }

1625

1626 continue;

1627 }

1628

1632 case Intrinsic::lifetime_start:

1633 case Intrinsic::lifetime_end:

1634

1636 continue;

1637 case Intrinsic::memcpy:

1638 case Intrinsic::memmove:

1639

1640

1641

1643 continue;

1644 case Intrinsic::memset: {

1650 continue;

1651 }

1652 case Intrinsic::invariant_start:

1653 case Intrinsic::invariant_end:

1654 case Intrinsic::launder_invariant_group:

1655 case Intrinsic::strip_invariant_group: {

1657 if (Intr->getIntrinsicID() == Intrinsic::invariant_start) {

1659 } else if (Intr->getIntrinsicID() == Intrinsic::invariant_end) {

1662 }

1671 continue;

1672 }

1673 case Intrinsic::objectsize: {

1675

1677 Intrinsic::objectsize,

1682 continue;

1683 }

1684 default:

1686 llvm_unreachable("Don't know how to promote alloca intrinsic use.");

1687 }

1688 }

1689

1693 assert(ID == Intrinsic::memcpy || ID == Intrinsic::memmove);

1694

1697 ID, MI->getRawDest(), MI->getDestAlign(), MI->getRawSource(),

1698 MI->getSourceAlign(), MI->getLength(), MI->isVolatile());

1699

1700 for (unsigned I = 0; I != 2; ++I) {

1702 B->addDereferenceableParamAttr(I, Bytes);

1703 }

1704 }

1705

1707 }

1708

1709 return true;

1710}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

static bool runOnFunction(Function &F, bool PostInlining)

AMD GCN specific subclass of TargetSubtarget.

uint64_t IntrinsicInst * II

if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Remove Loads Into Fake Uses

static unsigned getNumElements(Type *Ty)

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

Target-Independent Code Generator Pass Configuration Options pass.

static const AMDGPUSubtarget & get(const MachineFunction &MF)

Class for arbitrary precision integers.

static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

LLVM_ABI APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

bool isOne() const

Determine if this is a value of 1.

an instruction to allocate memory on the stack

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)

This static method is the primary way to construct an ArrayType.

An instruction that atomically checks whether a specified value is in a memory location,...

an instruction that atomically reads a memory location, combines it with another value,...

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

Represents analyses that only rely on functions' control flow.

uint64_t getParamDereferenceableBytes(unsigned i) const

Extract the number of dereferenceable bytes for a call or parameter (0=unknown).

void addDereferenceableRetAttr(uint64_t Bytes)

adds the dereferenceable attribute to the list of attributes.

void addRetAttr(Attribute::AttrKind Kind)

Adds the attribute to the return value.

Value * getArgOperand(unsigned i) const

This class represents a function call, abstracting a target machine's calling convention.

static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)

Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.

This is the shared class of boolean and integer constants.

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

This is an important base class in LLVM.

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

A parsed version of the target data layout string in and methods for querying it.

ValueT & at(const_arg_type_t< KeyT > Val)

at - Return the entry for the specified key, or abort if no such entry exists.

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Implements a dense probed hash-table based set.

Class to represent fixed width SIMD vectors.

unsigned getNumElements() const

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

This class represents a freeze function that returns random concrete value if an operand is either a ...

FunctionPass class - This class is used to implement most global optimizations.

Class to represent function types.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

const Function & getFunction() const

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

bool hasExternalLinkage() const

void setUnnamedAddr(UnnamedAddr Val)

unsigned getAddressSpace() const

@ InternalLinkage

Rename collisions when linking (static functions).

Type * getValueType() const

MaybeAlign getAlign() const

Returns the alignment of the given variable.

void setAlignment(Align Align)

Sets the alignment attribute of the GlobalVariable.

This instruction compares its operands according to the predicate given to the constructor.

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

BasicBlock * GetInsertBlock() const

Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())

Create and insert a memset to the specified pointer and the specified value.

Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())

Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.

LLVM_ABI const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

Analysis pass that exposes the LoopInfo for a function.

The legacy pass manager's analysis pass to compute loop information.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

std::pair< KeyT, ValueT > & front()

Value * getLength() const

Value * getRawDest() const

MaybeAlign getDestAlign() const

This class wraps the llvm.memset and llvm.memset.inline intrinsics.

This class wraps the llvm.memcpy/memmove intrinsics.

A Module instance is used to store all the information related to an LLVM module.

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

Class to represent pointers.

static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)

This constructs a pointer to an object of the specified type in a numbered address space.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

Helper class for SSA formation on a set of values defined in multiple blocks.

Value * FindValueForBlock(BasicBlock *BB) const

Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.

void Initialize(Type *Ty, StringRef Name)

Reset this object to get ready for a new set of SSA updates with type 'Ty'.

Value * GetValueInMiddleOfBlock(BasicBlock *BB)

Construct SSA form, materializing a value that is live in the middle of the specified block.

void AddAvailableValue(BasicBlock *BB, Value *V)

Indicate that a rewritten value is available in the specified block with the specified value.

This class represents the LLVM 'select' instruction.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

static unsigned getPointerOperandIndex()

StringRef - Represent a constant reference to a string, i.e.

Primary interface to the complete machine description for the target machine.

const Triple & getTargetTriple() const

const STC & getSubtarget(const Function &F) const

This method returns a pointer to the specified type of TargetSubtargetInfo.

Triple - Helper class for working with autoconf configuration names.

bool isAMDGCN() const

Tests whether the target is AMDGCN.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

bool isArrayTy() const

True if this is an instance of ArrayType.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isPointerTy() const

True if this is an instance of PointerType.

bool isAggregateType() const

Return true if the type is an aggregate type.

LLVM_ABI Type * getWithNewType(Type *EltTy) const

Given vector type, change the element type, whilst keeping the old number of elements.

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

A Use represents the edge between a Value definition and its users.

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const

Implement operator<< on Value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

LLVM_ABI const Value * stripPointerCasts() const

Strip off pointer casts, all-zero GEPs and address space casts.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

void mutateType(Type *Ty)

Mutate the type of this Value to be of the specified type.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

static LLVM_ABI bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

Type * getElementType() const

constexpr bool isKnownMultipleOf(ScalarTy RHS) const

This function tells the caller whether the element count is known at compile time to be a multiple of...

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ LOCAL_ADDRESS

Address space for local memory.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)

unsigned getDynamicVGPRBlockSize(const Function &F)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ SPIR_KERNEL

Used for SPIR kernel functions.

@ C

The default llvm calling convention, compatible with C.

This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

specific_intval< false > m_SpecificInt(const APInt &V)

Match a specific integer value or vector with all elements equal to the value.

bool match(Val *V, const Pattern &P)

initializer< Ty > init(const Ty &Val)

NodeAddr< PhiNode * > Phi

This is an optimization pass for GlobalISel generic memory operations.

void stable_sort(R &&Range)

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)

Return true if it is an intrinsic that cannot be speculated but also cannot trap.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

const Value * getPointerOperand(const Value *V)

A helper function that returns the pointer operand of a load, store or GEP instruction.

auto reverse(ContainerTy &&C)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

constexpr int PoisonMaskElem

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

FunctionPass * createAMDGPUPromoteAlloca()

Definition AMDGPUPromoteAlloca.cpp:236

@ Mod

The access may modify the value stored in memory.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

char & AMDGPUPromoteAllocaID

Definition AMDGPUPromoteAlloca.cpp:210

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

ConstantInt * SrcIndex

Definition AMDGPUPromoteAlloca.cpp:370

ConstantInt * DestIndex

Definition AMDGPUPromoteAlloca.cpp:371

This struct is a compact representation of a valid (non-zero power of two) alignment.

A MapVector that performs no allocations if smaller than a certain size.

Function object to check whether the second component of a container supported by std::get (like std:...