LLVM: lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

195#include "llvm/IR/IntrinsicsAMDGPU.h"

207

208#include

209

210#include

211

212#define DEBUG_TYPE "amdgpu-lower-module-lds"

213

214using namespace llvm;

215using namespace AMDGPU;

216

217namespace {

218

220 "amdgpu-super-align-lds-globals",

221 cl::desc("Increase alignment of LDS if it is not on align boundary"),

223

224enum class LoweringKind { module, table, kernel, hybrid };

226 "amdgpu-lower-module-lds-strategy",

227 cl::desc("Specify lowering strategy for function LDS access:"), cl::Hidden,

228 cl::init(LoweringKind::hybrid),

230 clEnumValN(LoweringKind::table, "table", "Lower via table lookup"),

231 clEnumValN(LoweringKind::module, "module", "Lower via module struct"),

233 LoweringKind::kernel, "kernel",

234 "Lower variables reachable from one kernel, otherwise abort"),

235 clEnumValN(LoweringKind::hybrid, "hybrid",

236 "Lower via mixture of above strategies")));

237

238template std::vector sortByName(std::vector &&V) {

239 llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {

240 return L->getName() < R->getName();

241 });

242 return {std::move(V)};

243}

244

245class AMDGPULowerModuleLDS {

247

248 static void

249 removeLocalVarsFromUsedLists(Module &M,

251

252

255 LocalVarsSet.insert(cast(LocalVar->stripPointerCasts()));

256

258 M, [&LocalVarsSet](Constant *C) { return LocalVarsSet.count(C); });

259

261 LocalVar->removeDeadConstantUsers();

262 }

263

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285 BasicBlock *Entry = &Func->getEntryBlock();

286 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());

287

289 Func->getParent(), Intrinsic::donothing, {});

290

291 Value *UseInstance[1] = {

293

296 }

297

298public:

300

301 struct LDSVariableReplacement {

304 };

305

306

307

308

309

310

311 static Constant *getAddressesOfVariablesInKernel(

314

315

316

317

318

320

321 ArrayType *KernelOffsetsType = ArrayType::get(I32, Variables.size());

322

325 auto ConstantGepIt = LDSVarsToConstantGEP.find(GV);

326 if (ConstantGepIt != LDSVarsToConstantGEP.end()) {

328 Elements.push_back(elt);

329 } else {

331 }

332 }

334 }

335

340 if (Variables.empty()) {

341 return nullptr;

342 }

344

345 const size_t NumberVariables = Variables.size();

346 const size_t NumberKernels = kernels.size();

347

350

351 ArrayType *AllKernelsOffsetsType =

352 ArrayType::get(KernelOffsetsType, NumberKernels);

353

355 std::vector<Constant *> overallConstantExprElts(NumberKernels);

356 for (size_t i = 0; i < NumberKernels; i++) {

357 auto Replacement = KernelToReplacement.find(kernels[i]);

358 overallConstantExprElts[i] =

359 (Replacement == KernelToReplacement.end())

360 ? Missing

361 : getAddressesOfVariablesInKernel(

362 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);

363 }

364

367

372 }

373

377 Value *OptionalIndex) {

378

381 auto *I = cast(U.getUser());

382

383 Value *tableKernelIndex = getTableLookupKernelIndex(M, I->getFunction());

384

385 if (auto *Phi = dyn_cast(I)) {

386 BasicBlock *BB = Phi->getIncomingBlock(U);

388 } else {

390 }

391

393 ConstantInt::get(I32, 0),

394 tableKernelIndex,

395 };

396 if (OptionalIndex)

398

400 LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());

401

403

404 Value *replacement =

406

407 U.set(replacement);

408 }

409

410 void replaceUsesInInstructionsWithTableLookup(

413

417

418 for (size_t Index = 0; Index < ModuleScopeVariables.size(); Index++) {

419 auto *GV = ModuleScopeVariables[Index];

420

422 auto *I = dyn_cast(U.getUser());

423 if (I)

424 continue;

425

426 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,

427 ConstantInt::get(I32, Index));

428 }

429 }

430 }

431

435

437

438 if (VariableSet.empty())

439 return KernelSet;

440

441 for (Function &Func : M.functions()) {

442 if (Func.isDeclaration() || isKernelLDS(&Func))

443 continue;

445 if (VariableSet.contains(GV)) {

446 KernelSet.insert(&Func);

447 break;

448 }

449 }

450 }

451

452 return KernelSet;

453 }

454

456 chooseBestVariableForModuleStrategy(const DataLayout &DL,

458

459

460 struct CandidateTy {

462 size_t UserCount = 0;

463 size_t Size = 0;

464

465 CandidateTy() = default;

466

468 : GV(GV), UserCount(UserCount), Size(AllocSize) {}

469

471

472 if (UserCount < Other.UserCount) {

473 return true;

474 }

475 if (UserCount > Other.UserCount) {

476 return false;

477 }

478

479

481 return false;

482 }

483

485 return true;

486 }

487

488

490 }

491 };

492

493 CandidateTy MostUsed;

494

495 for (auto &K : LDSVars) {

497 if (K.second.size() <= 1) {

498

499

500 continue;

501 }

502 CandidateTy Candidate(

503 GV, K.second.size(),

504 DL.getTypeAllocSize(GV->getValueType()).getFixedValue());

505 if (MostUsed < Candidate)

506 MostUsed = Candidate;

507 }

508

509 return MostUsed.GV;

510 }

511

514

515

517 auto *IntTy =

521 GV->setMetadata(LLVMContext::MD_absolute_symbol,

523 }

524

527

528

529

530 auto [It, Inserted] = tableKernelIndexCache.try_emplace(F);

531 if (Inserted) {

532 auto InsertAt = F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();

534

535 It->second =

536 Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {}, {});

537 }

538

539 return It->second;

540 }

541

542 static std::vector<Function *> assignLDSKernelIDToEachKernel(

545

546

547

548

549

550 std::vector<Function *> OrderedKernels;

551 if (!KernelsThatAllocateTableLDS.empty() ||

552 !KernelsThatIndirectlyAllocateDynamicLDS.empty()) {

553

554 for (Function &Func : M->functions()) {

555 if (Func.isDeclaration())

556 continue;

558 continue;

559

560 if (KernelsThatAllocateTableLDS.contains(&Func) ||

561 KernelsThatIndirectlyAllocateDynamicLDS.contains(&Func)) {

562 assert(Func.hasName());

563 OrderedKernels.push_back(&Func);

564 }

565 }

566

567

568 OrderedKernels = sortByName(std::move(OrderedKernels));

569

570

573

574 if (OrderedKernels.size() > UINT32_MAX) {

575

576 report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels");

577 }

578

579 for (size_t i = 0; i < OrderedKernels.size(); i++) {

582 };

583 OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id",

585 }

586 }

587 return OrderedKernels;

588 }

589

590 static void partitionVariablesIntoIndirectStrategies(

597

599 LoweringKindLoc != LoweringKind::hybrid

600 ? nullptr

601 : chooseBestVariableForModuleStrategy(

602 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);

603

606 HybridModuleRoot

607 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]

608 : EmptySet;

609

610 for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {

611

612

613

616 assert(K.second.size() != 0);

617

619 DynamicVariables.insert(GV);

620 continue;

621 }

622

623 switch (LoweringKindLoc) {

624 case LoweringKind::module:

625 ModuleScopeVariables.insert(GV);

626 break;

627

628 case LoweringKind::table:

629 TableLookupVariables.insert(GV);

630 break;

631

632 case LoweringKind::kernel:

633 if (K.second.size() == 1) {

634 KernelAccessVariables.insert(GV);

635 } else {

637 "cannot lower LDS '" + GV->getName() +

638 "' to kernel access as it is reachable from multiple kernels");

639 }

640 break;

641

642 case LoweringKind::hybrid: {

643 if (GV == HybridModuleRoot) {

644 assert(K.second.size() != 1);

645 ModuleScopeVariables.insert(GV);

646 } else if (K.second.size() == 1) {

647 KernelAccessVariables.insert(GV);

648 } else if (set_is_subset(K.second, HybridModuleRootKernels)) {

649 ModuleScopeVariables.insert(GV);

650 } else {

651 TableLookupVariables.insert(GV);

652 }

653 break;

654 }

655 }

656 }

657

658

659

660 assert(ModuleScopeVariables.size() + TableLookupVariables.size() +

661 KernelAccessVariables.size() + DynamicVariables.size() ==

662 LDSToKernelsThatNeedToAccessItIndirectly.size());

663 }

664

665 static GlobalVariable *lowerModuleScopeStructVariables(

668

669

670

671

672

673

674

675 if (ModuleScopeVariables.empty()) {

676 return nullptr;

677 }

678

680

681 LDSVariableReplacement ModuleScopeReplacement =

682 createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",

683 ModuleScopeVariables);

684

687 cast(ModuleScopeReplacement.SGV),

688 PointerType::getUnqual(Ctx)))});

689

690

691 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);

692

693

694 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);

695

696

697 replaceLDSVariablesWithStruct(

698 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {

699 Instruction *I = dyn_cast(U.getUser());

700 if (I) {

701 return false;

702 }

705 });

706

707

708

709

710

711 for (Function &Func : M.functions()) {

712 if (Func.isDeclaration() || isKernelLDS(&Func))

713 continue;

714

715 if (KernelsThatAllocateModuleLDS.contains(&Func)) {

716 replaceLDSVariablesWithStruct(

717 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {

718 Instruction *I = dyn_cast(U.getUser());

719 if (I) {

720 return false;

721 }

723 return F == &Func;

724 });

725

726 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);

727 }

728 }

729

730 return ModuleScopeReplacement.SGV;

731 }

732

734 lowerKernelScopeStructVariables(

739

740

741

743 for (Function &Func : M.functions()) {

744 if (Func.isDeclaration() || isKernelLDS(&Func))

745 continue;

746

748

749

750 for (auto &v : LDSUsesInfo.direct_access[&Func]) {

752 KernelUsedVariables.insert(v);

753 }

754 }

755

756

757

760 KernelUsedVariables.insert(v);

761 }

762 }

763

764

765

766 if (KernelsThatAllocateModuleLDS.contains(&Func)) {

768 KernelUsedVariables.erase(v);

769 }

770 }

771

772 if (KernelUsedVariables.empty()) {

773

774

775 continue;

776 }

777

778

779

780

781

782

783

784 if (!Func.hasName()) {

786 }

787

788 std::string VarName =

789 (Twine("llvm.amdgcn.kernel.") + Func.getName() + ".lds").str();

790

791 auto Replacement =

792 createLDSVariableReplacement(M, VarName, KernelUsedVariables);

793

794

795

796

799 !Accesses->second.empty())

800 markUsedByKernel(&Func, Replacement.SGV);

801

802

803 removeLocalVarsFromUsedLists(M, KernelUsedVariables);

804 KernelToReplacement[&Func] = Replacement;

805

806

807 replaceLDSVariablesWithStruct(

808 M, KernelUsedVariables, Replacement, [&Func](Use &U) {

809 Instruction *I = dyn_cast(U.getUser());

810 return I && I->getFunction() == &Func;

811 });

812 }

813 return KernelToReplacement;

814 }

815

817 buildRepresentativeDynamicLDSInstance(Module &M, LDSUsesInfoTy &LDSUsesInfo,

819

820

821

822

823

824

825

826

827

828

830

833 Align MaxDynamicAlignment(1);

834

835 auto UpdateMaxAlignment = [&MaxDynamicAlignment, &DL](GlobalVariable *GV) {

837 MaxDynamicAlignment =

839 }

840 };

841

843 UpdateMaxAlignment(GV);

844 }

845

847 UpdateMaxAlignment(GV);

848 }

849

850 assert(func->hasName());

851 auto *emptyCharArray = ArrayType::get(Type::getInt8Ty(Ctx), 0);

855 false);

856 N->setAlignment(MaxDynamicAlignment);

857

859 return N;

860 }

861

866 std::vector<Function *> const &OrderedKernels) {

868 if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) {

872

873 std::vector<Constant *> newDynamicLDS;

874

875

876 for (auto &func : OrderedKernels) {

877

878 if (KernelsThatIndirectlyAllocateDynamicLDS.contains(func)) {

880 if (func->hasName()) {

882 }

883

885 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);

886

887 KernelToCreatedDynamicLDS[func] = N;

888

889 markUsedByKernel(func, N);

890

891 auto *emptyCharArray = ArrayType::get(Type::getInt8Ty(Ctx), 0);

893 emptyCharArray, N, ConstantInt::get(I32, 0), true);

895 } else {

897 }

898 }

899 assert(OrderedKernels.size() == newDynamicLDS.size());

900

901 ArrayType *t = ArrayType::get(I32, newDynamicLDS.size());

905 "llvm.amdgcn.dynlds.offset.table", nullptr,

907

910 auto *I = dyn_cast(U.getUser());

911 if (I)

912 continue;

914 continue;

915

916 replaceUseWithTableLookup(M, Builder, table, GV, U, nullptr);

917 }

918 }

919 }

920 return KernelToCreatedDynamicLDS;

921 }

922

925 bool NeedsReplacement = false;

926 for (Use &U : GV->uses()) {

927 if (auto *I = dyn_cast(U.getUser())) {

930 NeedsReplacement = true;

931 break;

932 }

933 }

934 }

935 if (!NeedsReplacement)

936 return GV;

937

944 if (auto *I = dyn_cast(U.getUser())) {

947 U.getUser()->replaceUsesOfWith(GV, NewGV);

948 }

949 }

950 }

951 return NewGV;

952 }

953

954 bool lowerSpecialLDSVariables(

957 bool Changed = false;

958

959 int NumAbsolutes = 0;

960 std::vector<GlobalVariable *> OrderedGVs;

961 for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {

964 continue;

965

966

967

968 if (LDSToKernelsThatNeedToAccessItIndirectly[GV].size() > 1) {

969 OrderedGVs.push_back(GV);

970 } else {

971

972

974 }

975 LDSToKernelsThatNeedToAccessItIndirectly.erase(GV);

976 }

977 OrderedGVs = sortByName(std::move(OrderedGVs));

979 int BarId = ++NumAbsolutes;

981

982

983 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;

984 recordLDSAbsoluteAddress(&M, GV, Offset);

985 }

986 OrderedGVs.clear();

987

988

989

990

991 std::vector<Function *> OrderedKernels;

995 OrderedKernels.push_back(F);

996 }

997 OrderedKernels = sortByName(std::move(OrderedKernels));

998

1000 for (Function *F : OrderedKernels) {

1003 continue;

1004

1007

1008 continue;

1009 }

1010 OrderedGVs.push_back(GV);

1011 }

1012 OrderedGVs = sortByName(std::move(OrderedGVs));

1014

1015

1016 auto NewGV = uniquifyGVPerKernel(M, GV, F);

1017 Changed |= (NewGV != GV);

1018 int BarId = (NumAbsolutes + 1);

1019 if (Kernel2BarId.find(F) != Kernel2BarId.end()) {

1020 BarId = (Kernel2BarId[F] + 1);

1021 }

1022 Kernel2BarId[F] = BarId;

1024 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;

1025 recordLDSAbsoluteAddress(&M, NewGV, Offset);

1026 }

1027 OrderedGVs.clear();

1028 }

1029

1034 K.second.erase(GV);

1035 }

1036 }

1037 return Changed;

1038 }

1039

1040 bool runOnModule(Module &M) {

1042 bool Changed = superAlignLDSGlobals(M);

1043

1045

1046 Changed = true;

1047

1048

1049

1051

1052

1058 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);

1059 }

1060 }

1061

1063

1064 Changed |= lowerSpecialLDSVariables(

1065 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly);

1066 }

1067

1068

1073 partitionVariablesIntoIndirectStrategies(

1074 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,

1075 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,

1076 DynamicVariables);

1077

1078

1079

1080

1082 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,

1083 ModuleScopeVariables);

1085 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,

1086 TableLookupVariables);

1087

1089 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,

1090 DynamicVariables);

1091

1092 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(

1093 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);

1094

1096 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,

1097 KernelsThatAllocateModuleLDS,

1098 MaybeModuleScopeStruct);

1099

1100

1101 for (auto &GV : KernelAccessVariables) {

1102 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];

1103 assert(funcs.size() == 1);

1104 LDSVariableReplacement Replacement =

1105 KernelToReplacement[*(funcs.begin())];

1106

1109

1110 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) {

1111 return isa(U.getUser());

1112 });

1113 }

1114

1115

1116 std::vector<Function *> OrderedKernels =

1117 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,

1118 KernelsThatIndirectlyAllocateDynamicLDS);

1119

1120 if (!KernelsThatAllocateTableLDS.empty()) {

1123

1124

1125

1126 auto TableLookupVariablesOrdered =

1127 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.begin(),

1128 TableLookupVariables.end()));

1129

1131 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);

1132 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,

1133 LookupTable);

1134 }

1135

1137 lowerDynamicLDSVariables(M, LDSUsesInfo,

1138 KernelsThatIndirectlyAllocateDynamicLDS,

1139 DynamicVariables, OrderedKernels);

1140

1141

1142

1143

1144 for (auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,

1145 &KernelsThatAllocateTableLDS})

1148

1149

1150

1151 {

1153

1154 for (Function &Func : M.functions()) {

1155 if (Func.isDeclaration() || isKernelLDS(&Func))

1156 continue;

1157

1158

1159

1160

1161

1162

1163

1164

1165

1166

1167

1168

1169 const bool AllocateModuleScopeStruct =

1170 MaybeModuleScopeStruct &&

1171 KernelsThatAllocateModuleLDS.contains(&Func);

1172

1173 auto Replacement = KernelToReplacement.find(&Func);

1174 const bool AllocateKernelScopeStruct =

1175 Replacement != KernelToReplacement.end();

1176

1177 const bool AllocateDynamicVariable =

1178 KernelToCreatedDynamicLDS.contains(&Func);

1179

1181

1182 if (AllocateModuleScopeStruct) {

1183

1184

1186 }

1187

1188 if (AllocateKernelScopeStruct) {

1189 GlobalVariable *KernelStruct = Replacement->second.SGV;

1191 recordLDSAbsoluteAddress(&M, KernelStruct, Offset);

1193 }

1194

1195

1196

1197

1198

1199 if (AllocateDynamicVariable) {

1200 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];

1202 recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);

1203 }

1204

1206 (void)TM;

1207 std::string Buffer;

1210

1211

1212

1213

1214

1215

1216

1217 if (AllocateDynamicVariable)

1219

1220 Func.addFnAttr("amdgpu-lds-size", Buffer);

1221 }

1222 }

1223 }

1224

1227

1231 }

1232

1233 return Changed;

1234 }

1235

1236private:

1237

1238

1239 static bool superAlignLDSGlobals(Module &M) {

1241 bool Changed = false;

1242 if (!SuperAlignLDSGlobals) {

1243 return Changed;

1244 }

1245

1246 for (auto &GV : M.globals()) {

1248

1249 continue;

1250 }

1252

1253 continue;

1254 }

1255

1257

1258 continue;

1259 }

1260

1263

1264 if (GVSize > 8) {

1265

1266 Alignment = std::max(Alignment, Align(16));

1267 } else if (GVSize > 4) {

1268

1269 Alignment = std::max(Alignment, Align(8));

1270 } else if (GVSize > 2) {

1271

1272 Alignment = std::max(Alignment, Align(4));

1273 } else if (GVSize > 1) {

1274

1275 Alignment = std::max(Alignment, Align(2));

1276 }

1277

1279 Changed = true;

1281 }

1282 }

1283 return Changed;

1284 }

1285

1286 static LDSVariableReplacement createLDSVariableReplacement(

1287 Module &M, std::string VarName,

1289

1290

1291

1292

1293

1297

1299 LayoutFields.reserve(LDSVarsToTransform.size());

1300 {

1301

1302

1303

1304 auto Sorted = sortByName(std::vector<GlobalVariable *>(

1305 LDSVarsToTransform.begin(), LDSVarsToTransform.end()));

1306

1312 }

1313 }

1314

1316

1317 std::vector<GlobalVariable *> LocalVars;

1319 LocalVars.reserve(LDSVarsToTransform.size());

1320 IsPaddingField.reserve(LDSVarsToTransform.size());

1321 {

1323 for (auto &F : LayoutFields) {

1325 static_cast<GlobalVariable *>(const_cast<void *>(F.Id));

1326 Align DataAlign = F.Alignment;

1327

1329 if (uint64_t Rem = CurrentOffset % DataAlignV) {

1330 uint64_t Padding = DataAlignV - Rem;

1331

1332

1333

1334

1335

1342 CurrentOffset += Padding;

1343 }

1344

1345 LocalVars.push_back(FGV);

1346 IsPaddingField.push_back(false);

1347 CurrentOffset += F.Size;

1348 }

1349 }

1350

1351 std::vector<Type *> LocalVarTypes;

1352 LocalVarTypes.reserve(LocalVars.size());

1353 std::transform(

1354 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),

1355 [](const GlobalVariable *V) -> Type * { return V->getValueType(); });

1356

1358

1360

1364 false);

1366

1369 for (size_t I = 0; I < LocalVars.size(); I++) {

1371 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};

1373 if (IsPaddingField[I]) {

1376 } else {

1377 Map[GV] = GEP;

1378 }

1379 }

1380 assert(Map.size() == LDSVarsToTransform.size());

1381 return {SGV, std::move(Map)};

1382 }

1383

1384 template

1385 static void replaceLDSVariablesWithStruct(

1387 const LDSVariableReplacement &Replacement, PredicateTy Predicate) {

1390

1391

1392

1393

1394 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(

1395 LDSVarsToTransformArg.begin(), LDSVarsToTransformArg.end()));

1396

1397

1398

1401 const size_t NumberVars = LDSVarsToTransform.size();

1402 if (NumberVars > 1) {

1404 AliasScopes.reserve(NumberVars);

1406 for (size_t I = 0; I < NumberVars; I++) {

1409 }

1410 NoAliasList.append(&AliasScopes[1], AliasScopes.end());

1411 }

1412

1413

1414

1415 for (size_t I = 0; I < NumberVars; I++) {

1417 Constant *GEP = Replacement.LDSVarsToConstantGEP.at(GV);

1418

1420

1421 APInt APOff(DL.getIndexTypeSizeInBits(GEP->getType()), 0);

1422 GEP->stripAndAccumulateInBoundsConstantOffsets(DL, APOff);

1424

1427

1428 if (I)

1429 NoAliasList[I - 1] = AliasScopes[I - 1];

1431 NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);

1432 MDNode *AliasScope =

1433 AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});

1434

1435 refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);

1436 }

1437 }

1438

1439 static void refineUsesAlignmentAndAA(Value *Ptr, Align A,

1441 MDNode *NoAlias, unsigned MaxDepth = 5) {

1442 if (!MaxDepth || (A == 1 && !AliasScope))

1443 return;

1444

1445 for (User *U : Ptr->users()) {

1446 if (auto *I = dyn_cast(U)) {

1447 if (AliasScope && I->mayReadOrWriteMemory()) {

1448 MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);

1450 : AliasScope);

1451 I->setMetadata(LLVMContext::MD_alias_scope, AS);

1452

1453 MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);

1455 I->setMetadata(LLVMContext::MD_noalias, NA);

1456 }

1457 }

1458

1459 if (auto *LI = dyn_cast(U)) {

1460 LI->setAlignment(std::max(A, LI->getAlign()));

1461 continue;

1462 }

1463 if (auto *SI = dyn_cast(U)) {

1464 if (SI->getPointerOperand() == Ptr)

1465 SI->setAlignment(std::max(A, SI->getAlign()));

1466 continue;

1467 }

1468 if (auto *AI = dyn_cast(U)) {

1469

1470

1471 if (AI->getPointerOperand() == Ptr)

1472 AI->setAlignment(std::max(A, AI->getAlign()));

1473 continue;

1474 }

1475 if (auto *AI = dyn_cast(U)) {

1476 if (AI->getPointerOperand() == Ptr)

1477 AI->setAlignment(std::max(A, AI->getAlign()));

1478 continue;

1479 }

1480 if (auto *GEP = dyn_cast(U)) {

1481 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());

1483 if (GEP->getPointerOperand() == Ptr) {

1485 if (GEP->accumulateConstantOffset(DL, Off))

1487 refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,

1488 MaxDepth - 1);

1489 }

1490 continue;

1491 }

1492 if (auto *I = dyn_cast(U)) {

1493 if (I->getOpcode() == Instruction::BitCast ||

1494 I->getOpcode() == Instruction::AddrSpaceCast)

1495 refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);

1496 }

1497 }

1498 }

1499};

1500

1501class AMDGPULowerModuleLDSLegacy : public ModulePass {

1502public:

1504 static char ID;

1505

1509 }

1510

1512 if (!TM)

1514 }

1515

1517 if (!TM) {

1518 auto &TPC = getAnalysis();

1520 }

1521

1522 return AMDGPULowerModuleLDS(*TM).runOnModule(M);

1523 }

1524};

1525

1526}

1527char AMDGPULowerModuleLDSLegacy::ID = 0;

1528

1530

1532 "Lower uses of LDS variables from non-kernel functions",

1533 false, false)

1538

1541 return new AMDGPULowerModuleLDSLegacy(TM);

1542}

1543

1548}

Lower uses of LDS variables from non kernel functions

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file implements the BitVector class.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

std::optional< std::vector< StOtherPiece > > Other

This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines generic set operations that may be used on set's of different types,...

Target-Independent Code Generator Pass Configuration Options pass.

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

A container for analyses that lazily runs them and caches their results.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

LLVM Basic Block Representation.

const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

The basic data container for the call graph of a Module of IR.

static Constant * get(ArrayType *T, ArrayRef< Constant * > V)

static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)

Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.

static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)

static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)

Getelementptr form.

This is an important base class in LLVM.

void removeDeadConstantUsers() const

If there are any dead constant users dangling off of this constant, remove them.

A parsed version of the target data layout string in and methods for querying it.

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool erase(const KeyT &Val)

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Implements a dense probed hash-table based set.

void setMetadata(unsigned KindID, MDNode *Node)

Set a particular kind of metadata attachment.

void setAlignment(Align Align)

Sets the alignment attribute of the GlobalObject.

LinkageTypes getLinkage() const

bool isAbsoluteSymbolRef() const

Returns whether this is a reference to an absolute symbol.

ThreadLocalMode getThreadLocalMode() const

PointerType * getType() const

Global values are always pointers.

@ InternalLinkage

Rename collisions when linking (static functions).

@ ExternalLinkage

Externally visible function.

Type * getValueType() const

const Constant * getInitializer() const

getInitializer - Return the initializer for this global variable.

bool hasInitializer() const

Definitions have initializers, declarations don't.

void copyAttributesFrom(const GlobalVariable *Src)

copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...

bool isConstant() const

If the value is a global constant, its value is immutable throughout the runtime execution of the pro...

void eraseFromParent()

eraseFromParent - This method unlinks 'this' from the containing module and deletes it.

Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")

Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")

CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

ConstantInt * getInt32(uint32_t C)

Get a constant 32-bit value.

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

This is an important class for using LLVM in a threaded context.

MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())

Return metadata appropriate for an alias scope root node.

MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())

Return metadata appropriate for an alias scope domain node.

static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

static MDNode * intersect(MDNode *A, MDNode *B)

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

virtual bool runOnModule(Module &M)=0

runOnModule - Virtual method overriden by subclasses to process the module being operated on.

A Module instance is used to store all the information related to an LLVM module.

A container for an operand bundle being viewed as a set of values rather than a set of uses.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

unsigned getAddressSpace() const

Return the address space of the Pointer type.

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Class to represent struct types.

static StructType * create(LLVMContext &Context, StringRef Name)

This creates an identified struct.

Target-Independent Code Generator Pass Configuration Options.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

static IntegerType * getInt8Ty(LLVMContext &C)

static IntegerType * getInt32Ty(LLVMContext &C)

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)

Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...

iterator_range< use_iterator > uses()

StringRef getName() const

Return a constant reference to the value's name.

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

bool erase(const ValueT &V)

A raw_ostream that writes to an std::string.

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ BARRIER_SCOPE_WORKGROUP

bool isDynamicLDS(const GlobalVariable &GV)

void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)

Strip FnAttr attribute from any functions where we may have introduced its use.

LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)

TargetExtType * isNamedBarrier(const GlobalVariable &GV)

bool isLDSVariableToLower(const GlobalVariable &GV)

bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)

Align getAlign(const DataLayout &DL, const GlobalVariable *GV)

bool isKernelLDS(const Function *F)

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool operator<(int64_t V1, const APSInt &V2)

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

bool set_is_subset(const S1Ty &S1, const S2Ty &S2)

set_is_subset(A, B) - Return true iff A in B

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &)

void sort(IteratorTy Start, IteratorTy End)

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

char & AMDGPULowerModuleLDSLegacyPassID

void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)

Removes global values from the llvm.used and llvm.compiler.used arrays.

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)

void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)

Adds global values to the llvm.compiler.used list.

std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)

Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

constexpr unsigned BitWidth

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

const AMDGPUTargetMachine & TM

FunctionVariableMap direct_access

FunctionVariableMap indirect_access

This struct is a compact representation of a valid (non-zero power of two) alignment.

uint64_t value() const

This is a hole in the type system and should not be abused.