LLVM: lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

197#include "llvm/IR/IntrinsicsAMDGPU.h"

209

210#include

211

212#include

213

214#define DEBUG_TYPE "amdgpu-lower-module-lds"

215

216using namespace llvm;

217using namespace AMDGPU;

218

219namespace {

220

222 "amdgpu-super-align-lds-globals",

223 cl::desc("Increase alignment of LDS if it is not on align boundary"),

225

226enum class LoweringKind { module, table, kernel, hybrid };

228 "amdgpu-lower-module-lds-strategy",

229 cl::desc("Specify lowering strategy for function LDS access:"), cl::Hidden,

230 cl::init(LoweringKind::hybrid),

232 clEnumValN(LoweringKind::table, "table", "Lower via table lookup"),

233 clEnumValN(LoweringKind::module, "module", "Lower via module struct"),

235 LoweringKind::kernel, "kernel",

236 "Lower variables reachable from one kernel, otherwise abort"),

237 clEnumValN(LoweringKind::hybrid, "hybrid",

238 "Lower via mixture of above strategies")));

239

240template std::vector sortByName(std::vector &&V) {

241 llvm::sort(V, [](const auto *L, const auto *R) {

242 return L->getName() < R->getName();

243 });

244 return {std::move(V)};

245}

246

247class AMDGPULowerModuleLDS {

249

250 static void

251 removeLocalVarsFromUsedLists(Module &M,

253

254

258

260 M, [&LocalVarsSet](Constant *C) { return LocalVarsSet.count(C); });

261

263 LocalVar->removeDeadConstantUsers();

264 }

265

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287 BasicBlock *Entry = &Func->getEntryBlock();

288 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());

289

291 Func->getParent(), Intrinsic::donothing, {});

292

293 Value *UseInstance[1] = {

294 Builder.CreateConstInBoundsGEP1_32(SGV->getValueType(), SGV, 0)};

295

296 Builder.CreateCall(

298 }

299

300public:

302

303 struct LDSVariableReplacement {

306 };

307

308

309

310

311

312

313 static Constant *getAddressesOfVariablesInKernel(

316

317

318

319

320

322

324

327 auto ConstantGepIt = LDSVarsToConstantGEP.find(GV);

328 if (ConstantGepIt != LDSVarsToConstantGEP.end()) {

330 Elements.push_back(elt);

331 } else {

333 }

334 }

336 }

337

342 if (Variables.empty()) {

343 return nullptr;

344 }

346

347 const size_t NumberVariables = Variables.size();

348 const size_t NumberKernels = kernels.size();

349

352

353 ArrayType *AllKernelsOffsetsType =

355

357 std::vector<Constant *> overallConstantExprElts(NumberKernels);

358 for (size_t i = 0; i < NumberKernels; i++) {

359 auto Replacement = KernelToReplacement.find(kernels[i]);

360 overallConstantExprElts[i] =

361 (Replacement == KernelToReplacement.end())

362 ? Missing

363 : getAddressesOfVariablesInKernel(

364 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);

365 }

366

369

374 }

375

379 Value *OptionalIndex) {

380

384

385 Value *tableKernelIndex = getTableLookupKernelIndex(M, I->getFunction());

386

388 BasicBlock *BB = Phi->getIncomingBlock(U);

390 } else {

391 Builder.SetInsertPoint(I);

392 }

393

395 ConstantInt::get(I32, 0),

396 tableKernelIndex,

397 };

398 if (OptionalIndex)

400

401 Value *Address = Builder.CreateInBoundsGEP(

402 LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());

403

404 Value *loaded = Builder.CreateLoad(I32, Address);

405

406 Value *replacement =

407 Builder.CreateIntToPtr(loaded, GV->getType(), GV->getName());

408

409 U.set(replacement);

410 }

411

412 void replaceUsesInInstructionsWithTableLookup(

415

419

420 for (size_t Index = 0; Index < ModuleScopeVariables.size(); Index++) {

421 auto *GV = ModuleScopeVariables[Index];

422

425 if (I)

426 continue;

427

428 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,

429 ConstantInt::get(I32, Index));

430 }

431 }

432 }

433

437

439

440 if (VariableSet.empty())

441 return KernelSet;

442

443 for (Function &Func : M.functions()) {

444 if (Func.isDeclaration() || isKernel(Func))

445 continue;

447 if (VariableSet.contains(GV)) {

448 KernelSet.insert(&Func);

449 break;

450 }

451 }

452 }

453

454 return KernelSet;

455 }

456

458 chooseBestVariableForModuleStrategy(const DataLayout &DL,

460

461

462 struct CandidateTy {

464 size_t UserCount = 0;

465 size_t Size = 0;

466

467 CandidateTy() = default;

468

470 : GV(GV), UserCount(UserCount), Size(AllocSize) {}

471

473

474 if (UserCount < Other.UserCount) {

475 return true;

476 }

477 if (UserCount > Other.UserCount) {

478 return false;

479 }

480

481

483 return false;

484 }

485

487 return true;

488 }

489

490

492 }

493 };

494

495 CandidateTy MostUsed;

496

497 for (auto &K : LDSVars) {

499 if (K.second.size() <= 1) {

500

501

502 continue;

503 }

504 CandidateTy Candidate(

505 GV, K.second.size(),

506 DL.getTypeAllocSize(GV->getValueType()).getFixedValue());

507 if (MostUsed < Candidate)

508 MostUsed = Candidate;

509 }

510

511 return MostUsed.GV;

512 }

513

516

517

519 auto *IntTy =

523 GV->setMetadata(LLVMContext::MD_absolute_symbol,

525 }

526

529

530

531

532 auto [It, Inserted] = tableKernelIndexCache.try_emplace(F);

533 if (Inserted) {

534 auto InsertAt = F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();

536

537 It->second = Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});

538 }

539

540 return It->second;

541 }

542

543 static std::vector<Function *> assignLDSKernelIDToEachKernel(

546

547

548

549

550

551 std::vector<Function *> OrderedKernels;

552 if (!KernelsThatAllocateTableLDS.empty() ||

553 !KernelsThatIndirectlyAllocateDynamicLDS.empty()) {

554

555 for (Function &Func : M->functions()) {

556 if (Func.isDeclaration())

557 continue;

559 continue;

560

561 if (KernelsThatAllocateTableLDS.contains(&Func) ||

562 KernelsThatIndirectlyAllocateDynamicLDS.contains(&Func)) {

563 assert(Func.hasName());

564 OrderedKernels.push_back(&Func);

565 }

566 }

567

568

569 OrderedKernels = sortByName(std::move(OrderedKernels));

570

571

574

575 if (OrderedKernels.size() > UINT32_MAX) {

576

578 }

579

580 for (size_t i = 0; i < OrderedKernels.size(); i++) {

583 };

584 OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id",

586 }

587 }

588 return OrderedKernels;

589 }

590

591 static void partitionVariablesIntoIndirectStrategies(

598

600 LoweringKindLoc != LoweringKind::hybrid

601 ? nullptr

602 : chooseBestVariableForModuleStrategy(

603 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);

604

607 HybridModuleRoot

608 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]

609 : EmptySet;

610

611 for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {

612

613

614

617 assert(K.second.size() != 0);

618

620 DynamicVariables.insert(GV);

621 continue;

622 }

623

624 switch (LoweringKindLoc) {

625 case LoweringKind::module:

626 ModuleScopeVariables.insert(GV);

627 break;

628

629 case LoweringKind::table:

630 TableLookupVariables.insert(GV);

631 break;

632

633 case LoweringKind::kernel:

634 if (K.second.size() == 1) {

635 KernelAccessVariables.insert(GV);

636 } else {

637

639 "cannot lower LDS '" + GV->getName() +

640 "' to kernel access as it is reachable from multiple kernels");

641 }

642 break;

643

644 case LoweringKind::hybrid: {

645 if (GV == HybridModuleRoot) {

646 assert(K.second.size() != 1);

647 ModuleScopeVariables.insert(GV);

648 } else if (K.second.size() == 1) {

649 KernelAccessVariables.insert(GV);

650 } else if (K.second == HybridModuleRootKernels) {

651 ModuleScopeVariables.insert(GV);

652 } else {

653 TableLookupVariables.insert(GV);

654 }

655 break;

656 }

657 }

658 }

659

660

661

662 assert(ModuleScopeVariables.size() + TableLookupVariables.size() +

663 KernelAccessVariables.size() + DynamicVariables.size() ==

664 LDSToKernelsThatNeedToAccessItIndirectly.size());

665 }

666

667 static GlobalVariable *lowerModuleScopeStructVariables(

670

671

672

673

674

675

676

677 if (ModuleScopeVariables.empty()) {

678 return nullptr;

679 }

680

682

683 LDSVariableReplacement ModuleScopeReplacement =

684 createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",

685 ModuleScopeVariables);

686

691

692

693 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);

694

695

696 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);

697

698

699 replaceLDSVariablesWithStruct(

700 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {

702 if (I) {

703 return false;

704 }

707 });

708

709

710

711

712

713 for (Function &Func : M.functions()) {

714 if (Func.isDeclaration() || isKernel(Func))

715 continue;

716

717 if (KernelsThatAllocateModuleLDS.contains(&Func)) {

718 replaceLDSVariablesWithStruct(

719 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {

721 if (I) {

722 return false;

723 }

725 return F == &Func;

726 });

727

728 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);

729 }

730 }

731

732 return ModuleScopeReplacement.SGV;

733 }

734

736 lowerKernelScopeStructVariables(

741

742

743

745 for (Function &Func : M.functions()) {

746 if (Func.isDeclaration() || isKernel(Func))

747 continue;

748

750

751

752 for (auto &v : LDSUsesInfo.direct_access[&Func]) {

754 KernelUsedVariables.insert(v);

755 }

756 }

757

758

759

762 KernelUsedVariables.insert(v);

763 }

764 }

765

766

767

768 if (KernelsThatAllocateModuleLDS.contains(&Func)) {

770 KernelUsedVariables.erase(v);

771 }

772 }

773

774 if (KernelUsedVariables.empty()) {

775

776

777 continue;

778 }

779

780

781

782

783

784

785

786 if (!Func.hasName()) {

788 }

789

790 std::string VarName =

791 (Twine("llvm.amdgcn.kernel.") + Func.getName() + ".lds").str();

792

793 auto Replacement =

794 createLDSVariableReplacement(M, VarName, KernelUsedVariables);

795

796

797

798

802 markUsedByKernel(&Func, Replacement.SGV);

803

804

805 removeLocalVarsFromUsedLists(M, KernelUsedVariables);

806 KernelToReplacement[&Func] = Replacement;

807

808

809 replaceLDSVariablesWithStruct(

810 M, KernelUsedVariables, Replacement, [&Func](Use &U) {

812 return I && I->getFunction() == &Func;

813 });

814 }

815 return KernelToReplacement;

816 }

817

819 buildRepresentativeDynamicLDSInstance(Module &M, LDSUsesInfoTy &LDSUsesInfo,

821

822

823

824

825

826

827

828

829

830

832

835 Align MaxDynamicAlignment(1);

836

837 auto UpdateMaxAlignment = [&MaxDynamicAlignment, &DL](GlobalVariable *GV) {

839 MaxDynamicAlignment =

841 }

842 };

843

845 UpdateMaxAlignment(GV);

846 }

847

849 UpdateMaxAlignment(GV);

850 }

851

857 false);

858 N->setAlignment(MaxDynamicAlignment);

859

861 return N;

862 }

863

868 std::vector<Function *> const &OrderedKernels) {

870 if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) {

874

875 std::vector<Constant *> newDynamicLDS;

876

877

878 for (auto &func : OrderedKernels) {

879

880 if (KernelsThatIndirectlyAllocateDynamicLDS.contains(func)) {

884 }

885

887 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);

888

889 KernelToCreatedDynamicLDS[func] = N;

890

891 markUsedByKernel(func, N);

892

895 emptyCharArray, N, ConstantInt::get(I32, 0), true);

897 } else {

899 }

900 }

901 assert(OrderedKernels.size() == newDynamicLDS.size());

902

907 "llvm.amdgcn.dynlds.offset.table", nullptr,

909

913 if (I)

914 continue;

916 continue;

917

918 replaceUseWithTableLookup(M, Builder, table, GV, U, nullptr);

919 }

920 }

921 }

922 return KernelToCreatedDynamicLDS;

923 }

924

925 bool runOnModule(Module &M) {

927 bool Changed = superAlignLDSGlobals(M);

928

930

931 Changed = true;

932

933

934

936

937

943 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);

944 }

945 }

946

947

952 partitionVariablesIntoIndirectStrategies(

953 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,

954 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,

955 DynamicVariables);

956

957

958

959

961 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,

962 ModuleScopeVariables);

964 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,

965 TableLookupVariables);

966

968 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,

969 DynamicVariables);

970

971 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(

972 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);

973

975 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,

976 KernelsThatAllocateModuleLDS,

977 MaybeModuleScopeStruct);

978

979

980 for (auto &GV : KernelAccessVariables) {

981 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];

982 assert(funcs.size() == 1);

983 LDSVariableReplacement Replacement =

984 KernelToReplacement[*(funcs.begin())];

985

988

989 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) {

991 });

992 }

993

994

995 std::vector<Function *> OrderedKernels =

996 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,

997 KernelsThatIndirectlyAllocateDynamicLDS);

998

999 if (!KernelsThatAllocateTableLDS.empty()) {

1002

1003

1004

1005 auto TableLookupVariablesOrdered =

1006 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.begin(),

1007 TableLookupVariables.end()));

1008

1010 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);

1011 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,

1012 LookupTable);

1013 }

1014

1016 lowerDynamicLDSVariables(M, LDSUsesInfo,

1017 KernelsThatIndirectlyAllocateDynamicLDS,

1018 DynamicVariables, OrderedKernels);

1019

1020

1021

1022

1023 for (auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,

1024 &KernelsThatAllocateTableLDS})

1027

1028

1029

1030 {

1032

1033 for (Function &Func : M.functions()) {

1034 if (Func.isDeclaration() || isKernel(Func))

1035 continue;

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046

1047

1048 const bool AllocateModuleScopeStruct =

1049 MaybeModuleScopeStruct &&

1050 KernelsThatAllocateModuleLDS.contains(&Func);

1051

1052 auto Replacement = KernelToReplacement.find(&Func);

1053 const bool AllocateKernelScopeStruct =

1054 Replacement != KernelToReplacement.end();

1055

1056 const bool AllocateDynamicVariable =

1057 KernelToCreatedDynamicLDS.contains(&Func);

1058

1060

1061 if (AllocateModuleScopeStruct) {

1062

1063

1065 }

1066

1067 if (AllocateKernelScopeStruct) {

1068 GlobalVariable *KernelStruct = Replacement->second.SGV;

1070 recordLDSAbsoluteAddress(&M, KernelStruct, Offset);

1072 }

1073

1074

1075

1076

1077

1078 if (AllocateDynamicVariable) {

1079 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];

1081 recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);

1082 }

1083

1085 (void)TM;

1086 std::string Buffer;

1089

1090

1091

1092

1093

1094

1095

1096 if (AllocateDynamicVariable)

1098

1099 Func.addFnAttr("amdgpu-lds-size", Buffer);

1100 }

1101 }

1102 }

1103

1106

1110 }

1111

1113 }

1114

1115private:

1116

1117

1118 static bool superAlignLDSGlobals(Module &M) {

1121 if (!SuperAlignLDSGlobals) {

1123 }

1124

1125 for (auto &GV : M.globals()) {

1127

1128 continue;

1129 }

1131

1132 continue;

1133 }

1134

1136

1137 continue;

1138 }

1139

1142

1143 if (GVSize > 8) {

1144

1145 Alignment = std::max(Alignment, Align(16));

1146 } else if (GVSize > 4) {

1147

1148 Alignment = std::max(Alignment, Align(8));

1149 } else if (GVSize > 2) {

1150

1151 Alignment = std::max(Alignment, Align(4));

1152 } else if (GVSize > 1) {

1153

1154 Alignment = std::max(Alignment, Align(2));

1155 }

1156

1160 }

1161 }

1163 }

1164

1165 static LDSVariableReplacement createLDSVariableReplacement(

1166 Module &M, std::string VarName,

1168

1169

1170

1171

1172

1176

1178 LayoutFields.reserve(LDSVarsToTransform.size());

1179 {

1180

1181

1182

1183 auto Sorted = sortByName(std::vector<GlobalVariable *>(

1184 LDSVarsToTransform.begin(), LDSVarsToTransform.end()));

1185

1191 }

1192 }

1193

1195

1196 std::vector<GlobalVariable *> LocalVars;

1198 LocalVars.reserve(LDSVarsToTransform.size());

1199 IsPaddingField.reserve(LDSVarsToTransform.size());

1200 {

1202 for (auto &F : LayoutFields) {

1204 static_cast<GlobalVariable *>(const_cast<void *>(F.Id));

1205 Align DataAlign = F.Alignment;

1206

1208 if (uint64_t Rem = CurrentOffset % DataAlignV) {

1209 uint64_t Padding = DataAlignV - Rem;

1210

1211

1212

1213

1214

1221 CurrentOffset += Padding;

1222 }

1223

1224 LocalVars.push_back(FGV);

1225 IsPaddingField.push_back(false);

1226 CurrentOffset += F.Size;

1227 }

1228 }

1229

1230 std::vector<Type *> LocalVarTypes;

1231 LocalVarTypes.reserve(LocalVars.size());

1232 std::transform(

1233 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),

1234 [](const GlobalVariable *V) -> Type * { return V->getValueType(); });

1235

1237

1239

1243 false);

1245

1248 for (size_t I = 0; I < LocalVars.size(); I++) {

1250 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};

1252 if (IsPaddingField[I]) {

1255 } else {

1256 Map[GV] = GEP;

1257 }

1258 }

1259 assert(Map.size() == LDSVarsToTransform.size());

1260 return {SGV, std::move(Map)};

1261 }

1262

1263 template

1264 static void replaceLDSVariablesWithStruct(

1266 const LDSVariableReplacement &Replacement, PredicateTy Predicate) {

1269

1270

1271

1272

1273 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(

1274 LDSVarsToTransformArg.begin(), LDSVarsToTransformArg.end()));

1275

1276

1277

1280 const size_t NumberVars = LDSVarsToTransform.size();

1281 if (NumberVars > 1) {

1283 AliasScopes.reserve(NumberVars);

1285 for (size_t I = 0; I < NumberVars; I++) {

1288 }

1289 NoAliasList.append(&AliasScopes[1], AliasScopes.end());

1290 }

1291

1292

1293

1294 for (size_t I = 0; I < NumberVars; I++) {

1296 Constant *GEP = Replacement.LDSVarsToConstantGEP.at(GV);

1297

1299

1300 APInt APOff(DL.getIndexTypeSizeInBits(GEP->getType()), 0);

1301 GEP->stripAndAccumulateInBoundsConstantOffsets(DL, APOff);

1303

1306

1307 if (I)

1308 NoAliasList[I - 1] = AliasScopes[I - 1];

1310 NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);

1311 MDNode *AliasScope =

1312 AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});

1313

1314 refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);

1315 }

1316 }

1317

1318 static void refineUsesAlignmentAndAA(Value *Ptr, Align A,

1320 MDNode *NoAlias, unsigned MaxDepth = 5) {

1321 if (!MaxDepth || (A == 1 && !AliasScope))

1322 return;

1323

1325

1328 if (AliasScope && I->mayReadOrWriteMemory()) {

1329 MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);

1331 : AliasScope);

1332 I->setMetadata(LLVMContext::MD_alias_scope, AS);

1333

1334 MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);

1335

1336

1337

1338

1339

1340

1341

1342

1343

1344

1345

1346

1347

1348

1349

1350

1351

1352

1353

1357 auto Intersection = set_intersection(ExistingDomains, LDSDomains);

1358 if (Intersection.empty()) {

1360 } else {

1362 }

1363 I->setMetadata(LLVMContext::MD_noalias, NA);

1364 }

1365 }

1366

1368 LI->setAlignment(std::max(A, LI->getAlign()));

1369 continue;

1370 }

1372 if (SI->getPointerOperand() == Ptr)

1373 SI->setAlignment(std::max(A, SI->getAlign()));

1374 continue;

1375 }

1377

1378

1379 if (AI->getPointerOperand() == Ptr)

1380 AI->setAlignment(std::max(A, AI->getAlign()));

1381 continue;

1382 }

1384 if (AI->getPointerOperand() == Ptr)

1385 AI->setAlignment(std::max(A, AI->getAlign()));

1386 continue;

1387 }

1389 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());

1391 if (GEP->getPointerOperand() == Ptr) {

1393 if (GEP->accumulateConstantOffset(DL, Off))

1395 refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,

1396 MaxDepth - 1);

1397 }

1398 continue;

1399 }

1401 if (I->getOpcode() == Instruction::BitCast ||

1402 I->getOpcode() == Instruction::AddrSpaceCast)

1403 refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);

1404 }

1405 }

1406 }

1407};

1408

1409class AMDGPULowerModuleLDSLegacy : public ModulePass {

1410public:

1412 static char ID;

1413

1416

1417 void getAnalysisUsage(AnalysisUsage &AU) const override {

1418 if (!TM)

1420 }

1421

1422 bool runOnModule(Module &M) override {

1423 if (!TM) {

1424 auto &TPC = getAnalysis();

1426 }

1427

1428 return AMDGPULowerModuleLDS(*TM).runOnModule(M);

1429 }

1430};

1431

1432}

1433char AMDGPULowerModuleLDSLegacy::ID = 0;

1434

1436

1438 "Lower uses of LDS variables from non-kernel functions",

1439 false, false)

1442 "Lower uses of LDS variables from non-kernel functions",

1444

1447 return new AMDGPULowerModuleLDSLegacy(TM);

1448}

1449

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file implements the BitVector class.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

This file contains the declarations for the subclasses of Constant, which represent the different fla...

DXIL Forward Handle Accesses

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This is the interface for a metadata-based scoped no-alias analysis.

This file defines generic set operations that may be used on set's of different types,...

Target-Independent Code Generator Pass Configuration Options pass.

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)

This static method is the primary way to construct an ArrayType.

LLVM Basic Block Representation.

LLVM_ABI const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

The basic data container for the call graph of a Module of IR.

static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)

static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)

Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.

static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)

static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)

Getelementptr form.

This is an important base class in LLVM.

LLVM_ABI void removeDeadConstantUsers() const

If there are any dead constant users dangling off of this constant, remove them.

A parsed version of the target data layout string in and methods for querying it.

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

Implements a dense probed hash-table based set.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set a particular kind of metadata attachment.

LLVM_ABI bool isAbsoluteSymbolRef() const

Returns whether this is a reference to an absolute symbol.

PointerType * getType() const

Global values are always pointers.

@ InternalLinkage

Rename collisions when linking (static functions).

@ ExternalLinkage

Externally visible function.

Type * getValueType() const

bool hasInitializer() const

Definitions have initializers, declarations don't.

LLVM_ABI void eraseFromParent()

eraseFromParent - This method unlinks 'this' from the containing module and deletes it.

void setAlignment(Align Align)

Sets the alignment attribute of the GlobalVariable.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

bool runOnModule(Module &) override

ImmutablePasses are never run.

This is an important class for using LLVM in a threaded context.

MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())

Return metadata appropriate for an alias scope root node.

MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())

Return metadata appropriate for an alias scope domain node.

static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)

static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)

Methods for metadata merging.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

A Module instance is used to store all the information related to an LLVM module.

A container for an operand bundle being viewed as a set of values rather than a set of uses.

static PointerType * getUnqual(Type *ElementType)

This constructs a pointer to an object of the specified type in the default address space (address sp...

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

A simple AA result which uses scoped-noalias metadata to answer queries.

static LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains)

Collect the set of scoped domains relevant to the noalias scopes.

bool insert(const value_type &X)

Insert a new element into the SetVector.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Class to represent struct types.

static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)

This creates an identified struct.

Target-Independent Code Generator Pass Configuration Options.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

iterator_range< user_iterator > users()

LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)

Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...

iterator_range< use_iterator > uses()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

bool erase(const ValueT &V)

A raw_ostream that writes to an std::string.

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

bool isDynamicLDS(const GlobalVariable &GV)

void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)

Strip FnAttr attribute from any functions where we may have introduced its use.

LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)

LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)

bool isLDSVariableToLower(const GlobalVariable &GV)

bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)

Align getAlign(const DataLayout &DL, const GlobalVariable *GV)

DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool operator<(int64_t V1, const APSInt &V2)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

void sort(IteratorTy Start, IteratorTy End)

char & AMDGPULowerModuleLDSLegacyPassID

Definition AMDGPULowerModuleLDSPass.cpp:1435

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)

set_intersection(A, B) - Return A ^ B

LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)

Removes global values from the llvm.used and llvm.compiler.used arrays.

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)

Definition AMDGPULowerModuleLDSPass.cpp:1446

LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)

Adds global values to the llvm.compiler.used list.

LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)

Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

constexpr unsigned BitWidth

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

LLVM_ABI void reportFatalUsageError(Error Err)

Report a fatal error that does not indicate a bug in LLVM.

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition AMDGPULowerModuleLDSPass.cpp:1450

const AMDGPUTargetMachine & TM

FunctionVariableMap direct_access

FunctionVariableMap indirect_access

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.