LLVM: lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

104#include "llvm/IR/IntrinsicsAMDGPU.h"

111

112#include

113

114#define DEBUG_TYPE "amdgpu-sw-lower-lds"

115#define COV5_HIDDEN_DYN_LDS_SIZE_ARG 15

116

117using namespace llvm;

118using namespace AMDGPU;

119

120namespace {

121

123 AsanInstrumentLDS("amdgpu-asan-instrument-lds",

124 cl::desc("Run asan instrumentation on LDS instructions "

125 "lowered to global memory"),

127

129

130struct LDSAccessTypeInfo {

133};

134

135

136

137

138struct KernelLDSParameters {

142 LDSAccessTypeInfo DirectAccess;

143 LDSAccessTypeInfo IndirectAccess;

145 LDSToReplacementIndicesMap;

149};

150

151

152

153struct NonKernelLDSParameters {

158};

159

160struct AsanInstrumentInfo {

161 int Scale = 0;

164};

165

166struct FunctionsAndLDSAccess {

172};

173

174class AMDGPUSwLowerLDS {

175public:

177 DomTreeCallback Callback)

178 : M(Mod), AMDGPUTM(TM), IRB(M.getContext()), DTCallback(Callback) {}

179 bool run();

180 void getUsesOfLDSByNonKernels();

181 void getNonKernelsWithLDSArguments(const CallGraph &CG);

186 void buildSwLDSGlobal(Function *Func);

187 void buildSwDynLDSGlobal(Function *Func);

188 void populateSwMetadataGlobal(Function *Func);

189 void populateSwLDSAttributeAndMetadata(Function *Func);

190 void populateLDSToReplacementIndicesMap(Function *Func);

191 void getLDSMemoryInstructions(Function *Func,

193 void replaceKernelLDSAccesses(Function *Func);

194 Value *getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr, Value *LDSPtr);

195 void translateLDSMemoryOperationsToGlobalMemory(

198 void poisonRedzones(Function *Func, Value *MallocPtr);

200 void buildNonKernelLDSOffsetTable(NonKernelLDSParameters &NKLDSParams);

201 void buildNonKernelLDSBaseTable(NonKernelLDSParameters &NKLDSParams);

203 getAddressesOfVariablesInKernel(Function *Func,

205 void lowerNonKernelLDSAccesses(Function *Func,

207 NonKernelLDSParameters &NKLDSParams);

208 void

209 updateMallocSizeForDynamicLDS(Function *Func, Value **CurrMallocSize,

210 Value *HiddenDynLDSSize,

212 void initAsanInfo();

213

214private:

218 DomTreeCallback DTCallback;

219 FunctionsAndLDSAccess FuncLDSAccessInfo;

220 AsanInstrumentInfo AsanInfo;

221};

222

223template SetVector sortByName(std::vector &&V) {

224

225

226 sort(V, [](const auto *L, const auto *R) {

227 return L->getName() < R->getName();

228 });

230}

231

234

235 return sortByName(

236 std::vector<GlobalVariable *>(Variables.begin(), Variables.end()));

237}

238

241

242

244 if (Kernels.size() > UINT32_MAX) {

245 report_fatal_error("Unimplemented SW LDS lowering for > 2**32 kernels");

246 }

248 sortByName(std::vector<Function *>(Kernels.begin(), Kernels.end()));

249 for (size_t i = 0; i < Kernels.size(); i++) {

252 };

253 Function *Func = OrderedKernels[i];

254 Func->setMetadata("llvm.amdgcn.lds.kernel.id",

256 }

257 return OrderedKernels;

258}

259

260void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) {

261

262

263

264 for (auto &K : FuncLDSAccessInfo.KernelToLDSParametersMap) {

267 if (!CGN)

268 continue;

269 for (auto &I : *CGN) {

273 continue;

275 continue;

276 for (auto AI = CalledFunc->arg_begin(), E = CalledFunc->arg_end();

277 AI != E; ++AI) {

278 Type *ArgTy = (*AI).getType();

280 continue;

282 continue;

283 FuncLDSAccessInfo.NonKernelsWithLDSArgument.insert(CalledFunc);

284

285

286 FuncLDSAccessInfo.KernelsWithIndirectLDSAccess.insert(Func);

287 }

288 }

289 }

290}

291

292void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {

293 for (GlobalVariable *GV : FuncLDSAccessInfo.AllNonKernelLDSAccess) {

295 continue;

296

300 if (isKernel(*F) && F->isDeclaration())

301 FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);

302 }

303 }

304 }

305}

306

309

310

314 MDNode *MetadataNode = MDB.createRange(ConstantInt::get(IntTy, Address),

315 ConstantInt::get(IntTy, Address + 1));

316 GV->setMetadata(LLVMContext::MD_absolute_symbol, MetadataNode);

317}

318

320 bool IsDynLDS) {

322 std::string Buffer;

325 if (IsDynLDS)

327 Func->addFnAttr("amdgpu-lds-size", Buffer);

328 }

329}

330

332 BasicBlock *Entry = &Func->getEntryBlock();

333 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());

334

336 Intrinsic::donothing, {});

337

338 Value *UseInstance[1] = {

339 Builder.CreateConstInBoundsGEP1_32(SGV->getValueType(), SGV, 0)};

340

341 Builder.CreateCall(Decl, {},

343}

344

345void AMDGPUSwLowerLDS::buildSwLDSGlobal(Function *Func) {

346

347

348 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

349

356 LDSParams.SwLDS->setSanitizerMetadata(MD);

357}

358

359void AMDGPUSwLowerLDS::buildSwDynLDSGlobal(Function *Func) {

360

361 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

362 if (LDSParams.DirectAccess.DynamicLDSGlobals.empty() &&

363 LDSParams.IndirectAccess.DynamicLDSGlobals.empty())

364 return;

365

369 "llvm.amdgcn." + Func->getName() + ".dynlds", nullptr,

371 markUsedByKernel(Func, LDSParams.SwDynLDS);

374 LDSParams.SwDynLDS->setSanitizerMetadata(MD);

375}

376

377void AMDGPUSwLowerLDS::populateSwLDSAttributeAndMetadata(Function *Func) {

378 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

379 bool IsDynLDSUsed = LDSParams.SwDynLDS;

381 recordLDSAbsoluteAddress(M, LDSParams.SwLDS, 0);

382 addLDSSizeAttribute(Func, Offset, IsDynLDSUsed);

383 if (LDSParams.SwDynLDS)

384 recordLDSAbsoluteAddress(M, LDSParams.SwDynLDS, Offset);

385}

386

387void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {

388

389

390 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

391 auto &Ctx = M.getContext();

392 auto &DL = M.getDataLayout();

393 std::vector<Type *> Items;

395 std::vector<Constant *> Initializers;

396 Align MaxAlignment(1);

397 auto UpdateMaxAlignment = [&MaxAlignment, &DL](GlobalVariable *GV) {

399 MaxAlignment = std::max(MaxAlignment, GVAlign);

400 };

401

402 for (GlobalVariable *GV : LDSParams.DirectAccess.StaticLDSGlobals)

403 UpdateMaxAlignment(GV);

404

405 for (GlobalVariable *GV : LDSParams.DirectAccess.DynamicLDSGlobals)

406 UpdateMaxAlignment(GV);

407

408 for (GlobalVariable *GV : LDSParams.IndirectAccess.StaticLDSGlobals)

409 UpdateMaxAlignment(GV);

410

411 for (GlobalVariable *GV : LDSParams.IndirectAccess.DynamicLDSGlobals)

412 UpdateMaxAlignment(GV);

413

414

417 MDItemOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md.item";

418

421 uint32_t &MallocSize = LDSParams.MallocSize;

423 int AsanScale = AsanInfo.Scale;

424 auto buildInitializerForSwLDSMD =

426 for (auto &GV : LDSGlobals) {

428 continue;

429 UniqueLDSGlobals.insert(GV);

430

432 const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);

433 Items.push_back(LDSItemTy);

434 Constant *ItemStartOffset = ConstantInt::get(Int32Ty, MallocSize);

435 Constant *SizeInBytesConst = ConstantInt::get(Int32Ty, SizeInBytes);

436

437 const uint64_t RightRedzoneSize =

439

440 MallocSize += SizeInBytes;

442 LDSParams.RedzoneOffsetAndSizeVector.emplace_back(MallocSize,

443 RightRedzoneSize);

444 MallocSize += RightRedzoneSize;

445

447 alignTo(SizeInBytes + RightRedzoneSize, MaxAlignment);

448 Constant *AlignedSizeInBytesConst =

449 ConstantInt::get(Int32Ty, AlignedSize);

450

451 MallocSize = alignTo(MallocSize, MaxAlignment);

454 AlignedSizeInBytesConst});

455 Initializers.push_back(InitItem);

456 }

457 };

459 SwLDSVector.insert(LDSParams.SwLDS);

460 buildInitializerForSwLDSMD(SwLDSVector);

461 buildInitializerForSwLDSMD(LDSParams.DirectAccess.StaticLDSGlobals);

462 buildInitializerForSwLDSMD(LDSParams.IndirectAccess.StaticLDSGlobals);

463 buildInitializerForSwLDSMD(LDSParams.DirectAccess.DynamicLDSGlobals);

464 buildInitializerForSwLDSMD(LDSParams.IndirectAccess.DynamicLDSGlobals);

465

466

467 Type *Ty = LDSParams.SwLDS->getValueType();

468 const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);

469 uint64_t AlignedSize = alignTo(SizeInBytes, MaxAlignment);

470 LDSParams.LDSSize = AlignedSize;

473 MDTypeOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md.type";

478 MDOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md";

484 LDSParams.SwLDSMetadata->setInitializer(data);

485 assert(LDSParams.SwLDS);

486

487 LDSParams.SwLDS->setAlignment(MaxAlignment);

488 if (LDSParams.SwDynLDS)

489 LDSParams.SwDynLDS->setAlignment(MaxAlignment);

492 LDSParams.SwLDSMetadata->setSanitizerMetadata(MD);

493}

494

495void AMDGPUSwLowerLDS::populateLDSToReplacementIndicesMap(Function *Func) {

496

497

498 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

502 for (auto &GV : LDSGlobals) {

504 continue;

505 UniqueLDSGlobals.insert(GV);

506 LDSParams.LDSToReplacementIndicesMap[GV] = {0, Idx, 0};

507 ++Idx;

508 }

509 };

512 SwLDSVector.insert(LDSParams.SwLDS);

513 PopulateIndices(SwLDSVector, Idx);

514 PopulateIndices(LDSParams.DirectAccess.StaticLDSGlobals, Idx);

515 PopulateIndices(LDSParams.IndirectAccess.StaticLDSGlobals, Idx);

516 PopulateIndices(LDSParams.DirectAccess.DynamicLDSGlobals, Idx);

517 PopulateIndices(LDSParams.IndirectAccess.DynamicLDSGlobals, Idx);

518}

519

521 Value *Replacement) {

522

523 auto ReplaceUsesLambda = [Func](const Use &U) -> bool {

524 auto *V = U.getUser();

526 auto *Func1 = Inst->getFunction();

527 if (Func == Func1)

528 return true;

529 }

530 return false;

531 };

533}

534

535void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {

536 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

539 GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata;

540 assert(SwLDSMetadata);

541 StructType *SwLDSMetadataStructType =

544 auto &IndirectAccess = LDSParams.IndirectAccess;

545 auto &DirectAccess = LDSParams.DirectAccess;

546

549 for (auto &GV : LDSGlobals) {

550

551

552 if ((IndirectAccess.StaticLDSGlobals.contains(GV) ||

553 IndirectAccess.DynamicLDSGlobals.contains(GV)) &&

554 (!DirectAccess.StaticLDSGlobals.contains(GV) &&

555 !DirectAccess.DynamicLDSGlobals.contains(GV)))

556 continue;

558 continue;

559 UniqueLDSGlobals.insert(GV);

560 auto &Indices = LDSParams.LDSToReplacementIndicesMap[GV];

561 assert(Indices.size() == 3);

562 Constant *GEPIdx[] = {ConstantInt::get(Int32Ty, Indices[0]),

563 ConstantInt::get(Int32Ty, Indices[1]),

564 ConstantInt::get(Int32Ty, Indices[2])};

566 SwLDSMetadataStructType, SwLDSMetadata, GEPIdx, true);

568 Value *BasePlusOffset =

571 false));

572 replacesUsesOfGlobalInFunction(Func, GV, BasePlusOffset);

573 }

574 };

575 ReplaceLDSGlobalUses(DirectAccess.StaticLDSGlobals);

576 ReplaceLDSGlobalUses(IndirectAccess.StaticLDSGlobals);

577 ReplaceLDSGlobalUses(DirectAccess.DynamicLDSGlobals);

578 ReplaceLDSGlobalUses(IndirectAccess.DynamicLDSGlobals);

579}

580

581void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS(

584 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

586

588 GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata;

589 assert(SwLDS && SwLDSMetadata);

592 unsigned MaxAlignment = SwLDS->getAlignment();

593 Value *MaxAlignValue = IRB.getInt32(MaxAlignment);

594 Value *MaxAlignValueMinusOne = IRB.getInt32(MaxAlignment - 1);

595

597 auto &Indices = LDSParams.LDSToReplacementIndicesMap[DynGV];

598

600 Constant *Index1 = ConstantInt::get(Int32Ty, Indices[1]);

601

604 MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2Offset});

605

606 IRB.CreateStore(*CurrMallocSize, GEPForOffset);

607

609 auto *GEPForSize = IRB.CreateInBoundsGEP(MetadataStructType, SwLDSMetadata,

610 {Index0, Index1, Index2Size});

611

613 IRB.CreateStore(CurrDynLDSSize, GEPForSize);

614 Constant *Index2AlignedSize = ConstantInt::get(Int32Ty, 2);

616 MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2AlignedSize});

617

618 Value *AlignedDynLDSSize =

619 IRB.CreateAdd(CurrDynLDSSize, MaxAlignValueMinusOne);

620 AlignedDynLDSSize = IRB.CreateUDiv(AlignedDynLDSSize, MaxAlignValue);

621 AlignedDynLDSSize = IRB.CreateMul(AlignedDynLDSSize, MaxAlignValue);

622 IRB.CreateStore(AlignedDynLDSSize, GEPForAlignedSize);

623

624

625 *CurrMallocSize = IRB.CreateAdd(*CurrMallocSize, AlignedDynLDSSize);

626 }

627}

628

631 assert(InsertBefore);

634 if (SP)

635 return DILocation::get(SP->getContext(), SP->getLine(), 1, SP);

637}

638

639void AMDGPUSwLowerLDS::getLDSMemoryInstructions(

645 LDSInstructions.insert(&Inst);

648 LDSInstructions.insert(&Inst);

651 LDSInstructions.insert(&Inst);

654 LDSInstructions.insert(&Inst);

658 LDSInstructions.insert(&Inst);

659 } else

660 continue;

661 }

662 }

663}

664

665Value *AMDGPUSwLowerLDS::getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr,

667 assert(LDSPtr && "Invalid LDS pointer operand");

673

674 ElementCount NumElements = VecPtrTy->getElementCount();

676 }

679}

680

681void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(

684 LLVM_DEBUG(dbgs() << "Translating LDS memory operations to global memory : "

685 << Func->getName());

686 for (Instruction *Inst : LDSInstructions) {

689 Value *LIOperand = LI->getPointerOperand();

690 Value *Replacement =

691 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, LIOperand);

693 LI->getAlign(), LI->isVolatile());

694 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());

695 AsanInfo.Instructions.insert(NewLI);

696 LI->replaceAllUsesWith(NewLI);

697 LI->eraseFromParent();

699 Value *SIOperand = SI->getPointerOperand();

700 Value *Replacement =

701 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, SIOperand);

703 SI->getValueOperand(), Replacement, SI->getAlign(), SI->isVolatile());

704 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());

705 AsanInfo.Instructions.insert(NewSI);

706 SI->replaceAllUsesWith(NewSI);

707 SI->eraseFromParent();

709 Value *RMWPtrOperand = RMW->getPointerOperand();

710 Value *RMWValOperand = RMW->getValOperand();

711 Value *Replacement =

712 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, RMWPtrOperand);

714 RMW->getOperation(), Replacement, RMWValOperand, RMW->getAlign(),

715 RMW->getOrdering(), RMW->getSyncScopeID());

717 AsanInfo.Instructions.insert(NewRMW);

718 RMW->replaceAllUsesWith(NewRMW);

719 RMW->eraseFromParent();

721 Value *XCHGPtrOperand = XCHG->getPointerOperand();

722 Value *Replacement =

723 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, XCHGPtrOperand);

725 Replacement, XCHG->getCompareOperand(), XCHG->getNewValOperand(),

726 XCHG->getAlign(), XCHG->getSuccessOrdering(),

727 XCHG->getFailureOrdering(), XCHG->getSyncScopeID());

728 NewXCHG->setVolatile(XCHG->isVolatile());

729 AsanInfo.Instructions.insert(NewXCHG);

730 XCHG->replaceAllUsesWith(NewXCHG);

731 XCHG->eraseFromParent();

733 Value *AIOperand = ASC->getPointerOperand();

734 Value *Replacement =

735 getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, AIOperand);

737

738

739

741 ASC->eraseFromParent();

742 } else

744 }

745}

746

747void AMDGPUSwLowerLDS::poisonRedzones(Function *Func, Value *MallocPtr) {

748 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

751 FunctionCallee AsanPoisonRegion = M.getOrInsertFunction(

752 "__asan_poison_region",

754

755 auto RedzonesVec = LDSParams.RedzoneOffsetAndSizeVector;

756 size_t VecSize = RedzonesVec.size();

757 for (unsigned i = 0; i < VecSize; i++) {

758 auto &RedzonePair = RedzonesVec[i];

759 uint64_t RedzoneOffset = RedzonePair.first;

760 uint64_t RedzoneSize = RedzonePair.second;

762 IRB.getInt8Ty(), MallocPtr, {IRB.getInt64(RedzoneOffset)});

765 {RedzoneAddress, IRB.getInt64(RedzoneSize)});

766 }

767}

768

769void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,

771 LLVM_DEBUG(dbgs() << "Sw Lowering Kernel LDS for : " << Func->getName());

772 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

773 auto &Ctx = M.getContext();

774 auto *PrevEntryBlock = &Func->getEntryBlock();

776 getLDSMemoryInstructions(Func, LDSInstructions);

777

778

779 auto *MallocBlock = BasicBlock::Create(Ctx, "Malloc", Func, PrevEntryBlock);

780

781

782

794

795

796

797 IRB.CreateCondBr(WIdzCond, MallocBlock, PrevEntryBlock);

798

799

800 IRB.SetInsertPoint(MallocBlock, MallocBlock->begin());

801

802

803

804

806 GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata;

807 assert(SwLDS && SwLDSMetadata);

811 Value *CurrMallocSize;

814

817 for (auto &GV : LDSGlobals) {

819 continue;

820 UniqueLDSGlobals.insert(GV);

821 }

822 };

823

824 GetUniqueLDSGlobals(LDSParams.DirectAccess.StaticLDSGlobals);

825 GetUniqueLDSGlobals(LDSParams.IndirectAccess.StaticLDSGlobals);

826 unsigned NumStaticLDS = 1 + UniqueLDSGlobals.size();

827 UniqueLDSGlobals.clear();

828

829 if (NumStaticLDS) {

830 auto *GEPForEndStaticLDSOffset =

832 {ConstantInt::get(Int32Ty, 0),

833 ConstantInt::get(Int32Ty, NumStaticLDS - 1),

834 ConstantInt::get(Int32Ty, 0)});

835

836 auto *GEPForEndStaticLDSSize =

838 {ConstantInt::get(Int32Ty, 0),

839 ConstantInt::get(Int32Ty, NumStaticLDS - 1),

840 ConstantInt::get(Int32Ty, 2)});

841

842 Value *EndStaticLDSOffset =

845 CurrMallocSize = IRB.CreateAdd(EndStaticLDSOffset, EndStaticLDSSize);

846 } else

847 CurrMallocSize = IRB.getInt32(MallocSize);

848

849 if (LDSParams.SwDynLDS) {

852 "Dynamic LDS size query is only supported for CO V5 and later.");

853

855 IRB.CreateIntrinsic(Intrinsic::amdgcn_implicitarg_ptr, {});

858 {ConstantInt::get(Int64Ty, COV5_HIDDEN_DYN_LDS_SIZE_ARG)});

859 UniqueLDSGlobals.clear();

860 GetUniqueLDSGlobals(LDSParams.DirectAccess.DynamicLDSGlobals);

861 GetUniqueLDSGlobals(LDSParams.IndirectAccess.DynamicLDSGlobals);

862 updateMallocSizeForDynamicLDS(Func, &CurrMallocSize, HiddenDynLDSSize,

863 UniqueLDSGlobals);

864 }

865

866 CurrMallocSize = IRB.CreateZExt(CurrMallocSize, Int64Ty);

867

868

869

870 Value *ReturnAddress =

876 Value *MallocCall = IRB.CreateCall(MallocFunc, {CurrMallocSize, RAPtrToInt});

877

878 Value *MallocPtr =

880

881

883

884

885 poisonRedzones(Func, MallocPtr);

886

887

888 IRB.CreateBr(PrevEntryBlock);

889

890

892 IRB.SetInsertPoint(PrevEntryBlock, PrevEntryBlock->begin());

893 auto *XYZCondPhi = IRB.CreatePHI(Int1Ty, 2, "xyzCond");

895 XYZCondPhi->addIncoming(IRB.getInt1(1), MallocBlock);

896

898

899

900 Value *LoadMallocPtr =

902

903

904 replaceKernelLDSAccesses(Func);

905

906

907

908 translateLDSMemoryOperationsToGlobalMemory(Func, LoadMallocPtr,

909 LDSInstructions);

910

915 if (!BB.empty()) {

917 RI->eraseFromParent();

920 }

921 }

922 }

923

924

925 IRB.SetInsertPoint(CondFreeBlock, CondFreeBlock->begin());

927 IRB.CreateCondBr(XYZCondPhi, FreeBlock, EndBlock);

928

929

931

932

936 Value *ReturnAddr =

940 IRB.CreateCall(AsanFreeFunc, {MallocPtrToInt, RAPToInt});

941

943

944

947

952}

953

954Constant *AMDGPUSwLowerLDS::getAddressesOfVariablesInKernel(

957 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

958

959 GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata;

960 assert(SwLDSMetadata);

961 auto *SwLDSMetadataStructType =

965

967 for (auto *GV : Variables) {

968 auto It = LDSParams.LDSToReplacementIndicesMap.find(GV);

969 if (It == LDSParams.LDSToReplacementIndicesMap.end()) {

970 Elements.push_back(

972 continue;

973 }

974 auto &Indices = It->second;

975 Constant *GEPIdx[] = {ConstantInt::get(Int32Ty, Indices[0]),

976 ConstantInt::get(Int32Ty, Indices[1]),

977 ConstantInt::get(Int32Ty, Indices[2])};

979 SwLDSMetadata, GEPIdx, true);

980 Elements.push_back(GEP);

981 }

983}

984

985void AMDGPUSwLowerLDS::buildNonKernelLDSBaseTable(

986 NonKernelLDSParameters &NKLDSParams) {

987

988

989

990 auto &Kernels = NKLDSParams.OrderedKernels;

991 if (Kernels.empty())

992 return;

994 const size_t NumberKernels = Kernels.size();

995 ArrayType *AllKernelsOffsetsType =

997 std::vector<Constant *> OverallConstantExprElts(NumberKernels);

998 for (size_t i = 0; i < NumberKernels; i++) {

1000 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

1006 OverallConstantExprElts[i] = GEP;

1007 }

1017}

1018

1019void AMDGPUSwLowerLDS::buildNonKernelLDSOffsetTable(

1020 NonKernelLDSParameters &NKLDSParams) {

1021

1022

1023

1024

1025

1026

1027

1028 auto &Variables = NKLDSParams.OrdereLDSGlobals;

1029 auto &Kernels = NKLDSParams.OrderedKernels;

1030 if (Variables.empty() || Kernels.empty())

1031 return;

1032 const size_t NumberVariables = Variables.size();

1033 const size_t NumberKernels = Kernels.size();

1034

1037

1038 ArrayType *AllKernelsOffsetsType =

1040 std::vector<Constant *> overallConstantExprElts(NumberKernels);

1041 for (size_t i = 0; i < NumberKernels; i++) {

1042 Function *Func = Kernels[i];

1043 overallConstantExprElts[i] =

1044 getAddressesOfVariablesInKernel(Func, Variables);

1045 }

1055}

1056

1057void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(

1059 NonKernelLDSParameters &NKLDSParams) {

1060

1061

1062 LLVM_DEBUG(dbgs() << "Sw LDS lowering, lower non-kernel access for : "

1063 << Func->getName());

1064 auto InsertAt = Func->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();

1066

1067

1069 getLDSMemoryInstructions(Func, LDSInstructions);

1070

1071 auto *KernelId = IRB.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});

1072 GlobalVariable *LDSBaseTable = NKLDSParams.LDSBaseTable;

1073 GlobalVariable *LDSOffsetTable = NKLDSParams.LDSOffsetTable;

1074 auto &OrdereLDSGlobals = NKLDSParams.OrdereLDSGlobals;

1076 LDSBaseTable->getValueType(), LDSBaseTable, {IRB.getInt32(0), KernelId});

1077 Value *BaseLoad =

1079 Value *LoadMallocPtr =

1081

1083 const auto *GVIt = llvm::find(OrdereLDSGlobals, GV);

1084 assert(GVIt != OrdereLDSGlobals.end());

1085 uint32_t GVOffset = std::distance(OrdereLDSGlobals.begin(), GVIt);

1086

1088 LDSOffsetTable->getValueType(), LDSOffsetTable,

1089 {IRB.getInt32(0), KernelId, IRB.getInt32(GVOffset)});

1090 Value *OffsetLoad =

1093 Value *BasePlusOffset =

1095 LLVM_DEBUG(dbgs() << "Sw LDS Lowering, Replace non-kernel LDS for "

1097 replacesUsesOfGlobalInFunction(Func, GV, BasePlusOffset);

1098 }

1099 translateLDSMemoryOperationsToGlobalMemory(Func, LoadMallocPtr,

1100 LDSInstructions);

1101}

1102

1103static void reorderStaticDynamicIndirectLDSSet(KernelLDSParameters &LDSParams) {

1104

1105

1106 auto &DirectAccess = LDSParams.DirectAccess;

1107 auto &IndirectAccess = LDSParams.IndirectAccess;

1108 LDSParams.DirectAccess.StaticLDSGlobals = sortByName(

1109 std::vector<GlobalVariable *>(DirectAccess.StaticLDSGlobals.begin(),

1110 DirectAccess.StaticLDSGlobals.end()));

1111 LDSParams.DirectAccess.DynamicLDSGlobals = sortByName(

1112 std::vector<GlobalVariable *>(DirectAccess.DynamicLDSGlobals.begin(),

1113 DirectAccess.DynamicLDSGlobals.end()));

1114 LDSParams.IndirectAccess.StaticLDSGlobals = sortByName(

1115 std::vector<GlobalVariable *>(IndirectAccess.StaticLDSGlobals.begin(),

1116 IndirectAccess.StaticLDSGlobals.end()));

1117 LDSParams.IndirectAccess.DynamicLDSGlobals = sortByName(

1118 std::vector<GlobalVariable *>(IndirectAccess.DynamicLDSGlobals.begin(),

1119 IndirectAccess.DynamicLDSGlobals.end()));

1120}

1121

1122void AMDGPUSwLowerLDS::initAsanInfo() {

1123

1124 unsigned LongSize =

1127 int Scale;

1128 bool OrShadowOffset;

1130 &Offset, &Scale, &OrShadowOffset);

1131 AsanInfo.Scale = Scale;

1132 AsanInfo.Offset = Offset;

1133}

1134

1135static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {

1136 for (auto &K : LDSAccesses) {

1138 if (F)

1139 continue;

1140 if (F->hasFnAttribute(Attribute::SanitizeAddress))

1141 return true;

1142 }

1143 return false;

1144}

1145

1146bool AMDGPUSwLowerLDS::run() {

1148

1150

1152

1153

1155

1156

1157

1158 bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||

1159 hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);

1160

1161 if (!LowerAllLDS)

1163

1164

1165 auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,

1166 bool DirectAccess) {

1167 for (auto &K : LDSAccesses) {

1169 if (F || K.second.empty())

1170 continue;

1171

1173

1174

1175 FuncLDSAccessInfo.KernelToLDSParametersMap.insert(

1176 {F, KernelLDSParameters()});

1177

1178 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[F];

1179 if (!DirectAccess)

1180 FuncLDSAccessInfo.KernelsWithIndirectLDSAccess.insert(F);

1182 if (!DirectAccess) {

1184 LDSParams.IndirectAccess.DynamicLDSGlobals.insert(GV);

1185 else

1186 LDSParams.IndirectAccess.StaticLDSGlobals.insert(GV);

1187 FuncLDSAccessInfo.AllNonKernelLDSAccess.insert(GV);

1188 } else {

1190 LDSParams.DirectAccess.DynamicLDSGlobals.insert(GV);

1191 else

1192 LDSParams.DirectAccess.StaticLDSGlobals.insert(GV);

1193 }

1194 }

1195 }

1196 };

1197

1198 PopulateKernelStaticDynamicLDS(LDSUsesInfo.direct_access, true);

1199 PopulateKernelStaticDynamicLDS(LDSUsesInfo.indirect_access, false);

1200

1201

1202 initAsanInfo();

1203

1204 for (auto &K : FuncLDSAccessInfo.KernelToLDSParametersMap) {

1206 auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func];

1207 if (LDSParams.DirectAccess.StaticLDSGlobals.empty() &&

1208 LDSParams.DirectAccess.DynamicLDSGlobals.empty() &&

1209 LDSParams.IndirectAccess.StaticLDSGlobals.empty() &&

1210 LDSParams.IndirectAccess.DynamicLDSGlobals.empty()) {

1212 } else {

1214 CG, Func,

1215 {"amdgpu-no-workitem-id-x", "amdgpu-no-workitem-id-y",

1216 "amdgpu-no-workitem-id-z", "amdgpu-no-heap-ptr"});

1217 if (!LDSParams.IndirectAccess.StaticLDSGlobals.empty() ||

1218 !LDSParams.IndirectAccess.DynamicLDSGlobals.empty())

1220 reorderStaticDynamicIndirectLDSSet(LDSParams);

1221 buildSwLDSGlobal(Func);

1222 buildSwDynLDSGlobal(Func);

1223 populateSwMetadataGlobal(Func);

1224 populateSwLDSAttributeAndMetadata(Func);

1225 populateLDSToReplacementIndicesMap(Func);

1227 DomTreeUpdater::UpdateStrategy::Lazy);

1228 lowerKernelLDSAccesses(Func, DTU);

1230 }

1231 }

1232

1233

1234 getUsesOfLDSByNonKernels();

1235

1236

1237 getNonKernelsWithLDSArguments(CG);

1238

1239

1240 if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||

1241 !FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {

1242 NonKernelLDSParameters NKLDSParams;

1243 NKLDSParams.OrderedKernels = getOrderedIndirectLDSAccessingKernels(

1244 FuncLDSAccessInfo.KernelsWithIndirectLDSAccess);

1245 NKLDSParams.OrdereLDSGlobals = getOrderedNonKernelAllLDSGlobals(

1246 FuncLDSAccessInfo.AllNonKernelLDSAccess);

1247 buildNonKernelLDSBaseTable(NKLDSParams);

1248 buildNonKernelLDSOffsetTable(NKLDSParams);

1249 for (auto &K : FuncLDSAccessInfo.NonKernelToLDSAccessMap) {

1253 std::vector<GlobalVariable *>(LDSGlobals.begin(), LDSGlobals.end()));

1254 lowerNonKernelLDSAccesses(Func, OrderedLDSGlobals, NKLDSParams);

1255 }

1256 for (Function *Func : FuncLDSAccessInfo.NonKernelsWithLDSArgument) {

1257 auto &K = FuncLDSAccessInfo.NonKernelToLDSAccessMap;

1258 if (K.contains(Func))

1259 continue;

1261 lowerNonKernelLDSAccesses(Func, Vec, NKLDSParams);

1262 }

1264 }

1265

1268

1271

1275 }

1276 }

1277

1278 if (AsanInstrumentLDS) {

1280 for (Instruction *Inst : AsanInfo.Instructions) {

1284 }

1285 for (auto &Operand : OperandsToInstrument) {

1286 Value *Addr = Operand.getPtr();

1287 instrumentAddress(M, IRB, Operand.getInsn(), Operand.getInsn(), Addr,

1288 Operand.Alignment.valueOrOne(), Operand.TypeStoreSize,

1289 Operand.IsWrite, nullptr, false, false, AsanInfo.Scale,

1290 AsanInfo.Offset);

1292 }

1293 }

1294

1296}

1297

1298class AMDGPUSwLowerLDSLegacy : public ModulePass {

1299public:

1301 static char ID;

1304 bool runOnModule(Module &M) override;

1305 void getAnalysisUsage(AnalysisUsage &AU) const override {

1307 }

1308};

1309}

1310

1311char AMDGPUSwLowerLDSLegacy::ID = 0;

1313

1315 "AMDGPU Software lowering of LDS", false, false)

1319

1320bool AMDGPUSwLowerLDSLegacy::runOnModule(Module &M) {

1321

1322

1323 if (!M.getModuleFlag("nosanitize_address"))

1324 return false;

1326 getAnalysisIfAvailable();

1328 return DTW ? &DTW->getDomTree() : nullptr;

1329 };

1330 if (!AMDGPUTM) {

1331 auto &TPC = getAnalysis();

1333 }

1334 AMDGPUSwLowerLDS SwLowerLDSImpl(M, *AMDGPUTM, DTCallback);

1335 bool IsChanged = SwLowerLDSImpl.run();

1336 return IsChanged;

1337}

1338

1339ModulePass *

1341 return new AMDGPUSwLowerLDSLegacy(TM);

1342}

1343

1346

1347

1348 if (!M.getModuleFlag("nosanitize_address"))

1353 };

1354 AMDGPUSwLowerLDS SwLowerLDSImpl(M, TM, DTCallback);

1355 bool IsChanged = SwLowerLDSImpl.run();

1356 if (!IsChanged)

1358

1361 return PA;

1362}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file implements a set that has insertion order iteration characteristics.

Target-Independent Code Generator Pass Configuration Options pass.

static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, DISubprogram *SP)

This class represents a conversion between pointers from one address space to another.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)

This static method is the primary way to construct an ArrayType.

An instruction that atomically checks whether a specified value is in a memory location,...

void setVolatile(bool V)

Specify whether this is a volatile cmpxchg.

an instruction that atomically reads a memory location, combines it with another value,...

void setVolatile(bool V)

Specify whether this is a volatile RMW or not.

LLVM Basic Block Representation.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

A node in the call graph for a module.

Function * getFunction() const

Returns the function that this call graph node represents.

The basic data container for the call graph of a Module of IR.

static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)

static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)

Getelementptr form.

static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)

This is an important base class in LLVM.

LLVM_ABI void removeDeadConstantUsers() const

If there are any dead constant users dangling off of this constant, remove them.

Subprogram description. Uses SubclassData1.

A parsed version of the target data layout string in and methods for querying it.

Implements a dense probed hash-table based set.

Analysis pass which computes a DominatorTree.

static constexpr UpdateKind Insert

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...

static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)

This static method is the primary way of constructing a FunctionType.

void applyUpdates(ArrayRef< UpdateT > Updates)

Submit updates to all available trees.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set a particular kind of metadata attachment.

LLVM_ABI bool isDeclaration() const

Return true if the primary definition of this global value is outside of the current translation unit...

PointerType * getType() const

Global values are always pointers.

LLVM_ABI void setSanitizerMetadata(SanitizerMetadata Meta)

@ InternalLinkage

Rename collisions when linking (static functions).

@ ExternalLinkage

Externally visible function.

Type * getValueType() const

uint64_t getAlignment() const

FIXME: Remove this function once transition to Align is over.

LLVM_ABI void eraseFromParent()

eraseFromParent - This method unlinks 'this' from the containing module and deletes it.

ConstantInt * getInt1(bool V)

Get a constant value representing either true or false.

AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)

IntegerType * getInt1Ty()

Fetch the type representing a single bit.

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

void SetCurrentDebugLocation(DebugLoc L)

Set location information used by debugging information.

IntegerType * getInt64Ty()

Fetch the type representing a 64-bit integer.

Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")

Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

ConstantInt * getInt64(uint64_t C)

Get a constant 64-bit value.

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

ConstantInt * getInt32(uint32_t C)

Get a constant 32-bit value.

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")

BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)

Create a conditional 'br Cond, TrueDest, FalseDest' instruction.

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

ReturnInst * CreateRetVoid()

Create a 'ret void' instruction.

StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)

Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

PointerType * getPtrTy(unsigned AddrSpace=0)

Fetch the type representing a pointer.

BranchInst * CreateBr(BasicBlock *Dest)

Create an unconditional 'br label X' instruction.

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Type * getVoidTy()

Fetch the type representing void.

StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)

IntegerType * getInt8Ty()

Fetch the type representing an 8-bit integer.

Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Sets the ordering constraint and the synchronization scope ID of this load instruction.

LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)

Return metadata describing the range [Lo, Hi).

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

A Module instance is used to store all the information related to an LLVM module.

A container for an operand bundle being viewed as a set of values rather than a set of uses.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

Return a value (possibly void), from a function.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

iterator end()

Get an iterator to the end of the SetVector.

void clear()

Completely clear the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

iterator begin()

Get an iterator to the beginning of the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Sets the ordering constraint and the synchronization scope ID of this store instruction.

StringRef - Represent a constant reference to a string, i.e.

Class to represent struct types.

static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)

This creates an identified struct.

const Triple & getTargetTriple() const

Target-Independent Code Generator Pass Configuration Options.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const

Print the name of this Value out to the specified raw_ostream.

LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)

Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

An efficient, type-erasing, non-owning reference to a callable.

A raw_ostream that writes to an std::string.

A raw_ostream that writes to an SmallVector or SmallString.

StringRef str() const

Return a StringRef for the vector contents.

@ LOCAL_ADDRESS

Address space for local memory.

@ FLAT_ADDRESS

Address space for flat memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

void getInterestingMemoryOperands(Module &M, Instruction *I, SmallVectorImpl< InterestingMemoryOperand > &Interesting)

Get all the memory operands from the instruction that needs to be instrumented.

bool isDynamicLDS(const GlobalVariable &GV)

unsigned getAMDHSACodeObjectVersion(const Module &M)

void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)

Strip FnAttr attribute from any functions where we may have introduced its use.

LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)

LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)

DenseMap< Function *, DenseSet< GlobalVariable * > > FunctionVariableMap

bool isLDSVariableToLower(const GlobalVariable &GV)

bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)

Align getAlign(const DataLayout &DL, const GlobalVariable *GV)

void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, Align Alignment, TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, bool Recover, int AsanScale, int AsanOffset)

Instrument the memory operand Addr.

uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes)

Given SizeInBytes of the Value to be instrunmented, Returns the redzone size corresponding to it.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

ModulePass * createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)

Definition AMDGPUSwLowerLDS.cpp:1340

FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty

constexpr from_range_t from_range

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy

Provide the FunctionAnalysisManager to Module proxy.

char & AMDGPUSwLowerLDSLegacyPassID

Definition AMDGPUSwLowerLDS.cpp:1312

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, bool IsKasan, uint64_t *ShadowBase, int *MappingScale, bool *OrShadowOffset)

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

const AMDGPUTargetMachine & TM

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition AMDGPUSwLowerLDS.cpp:1344

FunctionVariableMap direct_access

FunctionVariableMap indirect_access

This struct is a compact representation of a valid (non-zero power of two) alignment.