LLVM: lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
197#include "llvm/IR/IntrinsicsAMDGPU.h"
209
210#include
211
212#include
213
214#define DEBUG_TYPE "amdgpu-lower-module-lds"
215
216using namespace llvm;
217using namespace AMDGPU;
218
219namespace {
220
222 "amdgpu-super-align-lds-globals",
223 cl::desc("Increase alignment of LDS if it is not on align boundary"),
225
226enum class LoweringKind { module, table, kernel, hybrid };
228 "amdgpu-lower-module-lds-strategy",
229 cl::desc("Specify lowering strategy for function LDS access:"), cl::Hidden,
230 cl::init(LoweringKind::hybrid),
232 clEnumValN(LoweringKind::table, "table", "Lower via table lookup"),
233 clEnumValN(LoweringKind::module, "module", "Lower via module struct"),
235 LoweringKind::kernel, "kernel",
236 "Lower variables reachable from one kernel, otherwise abort"),
237 clEnumValN(LoweringKind::hybrid, "hybrid",
238 "Lower via mixture of above strategies")));
239
240template std::vector sortByName(std::vector &&V) {
241 llvm::sort(V, [](const auto *L, const auto *R) {
242 return L->getName() < R->getName();
243 });
244 return {std::move(V)};
245}
246
247class AMDGPULowerModuleLDS {
249
250 static void
251 removeLocalVarsFromUsedLists(Module &M,
253
254
258
260 M, [&LocalVarsSet](Constant *C) { return LocalVarsSet.count(C); });
261
263 LocalVar->removeDeadConstantUsers();
264 }
265
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287 BasicBlock *Entry = &Func->getEntryBlock();
288 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
289
291 Func->getParent(), Intrinsic::donothing, {});
292
293 Value *UseInstance[1] = {
294 Builder.CreateConstInBoundsGEP1_32(SGV->getValueType(), SGV, 0)};
295
296 Builder.CreateCall(
298 }
299
300public:
302
303 struct LDSVariableReplacement {
306 };
307
308
309
310
311
312
313 static Constant *getAddressesOfVariablesInKernel(
316
317
318
319
320
322
324
327 auto ConstantGepIt = LDSVarsToConstantGEP.find(GV);
328 if (ConstantGepIt != LDSVarsToConstantGEP.end()) {
330 Elements.push_back(elt);
331 } else {
333 }
334 }
336 }
337
342 if (Variables.empty()) {
343 return nullptr;
344 }
346
347 const size_t NumberVariables = Variables.size();
348 const size_t NumberKernels = kernels.size();
349
352
353 ArrayType *AllKernelsOffsetsType =
355
357 std::vector<Constant *> overallConstantExprElts(NumberKernels);
358 for (size_t i = 0; i < NumberKernels; i++) {
359 auto Replacement = KernelToReplacement.find(kernels[i]);
360 overallConstantExprElts[i] =
361 (Replacement == KernelToReplacement.end())
362 ? Missing
363 : getAddressesOfVariablesInKernel(
364 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
365 }
366
369
374 }
375
379 Value *OptionalIndex) {
380
384
385 Value *tableKernelIndex = getTableLookupKernelIndex(M, I->getFunction());
386
388 BasicBlock *BB = Phi->getIncomingBlock(U);
390 } else {
391 Builder.SetInsertPoint(I);
392 }
393
395 ConstantInt::get(I32, 0),
396 tableKernelIndex,
397 };
398 if (OptionalIndex)
400
401 Value *Address = Builder.CreateInBoundsGEP(
402 LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());
403
404 Value *loaded = Builder.CreateLoad(I32, Address);
405
406 Value *replacement =
407 Builder.CreateIntToPtr(loaded, GV->getType(), GV->getName());
408
409 U.set(replacement);
410 }
411
412 void replaceUsesInInstructionsWithTableLookup(
415
419
420 for (size_t Index = 0; Index < ModuleScopeVariables.size(); Index++) {
421 auto *GV = ModuleScopeVariables[Index];
422
425 if ()
426 continue;
427
428 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
429 ConstantInt::get(I32, Index));
430 }
431 }
432 }
433
437
439
440 if (VariableSet.empty())
441 return KernelSet;
442
443 for (Function &Func : M.functions()) {
444 if (Func.isDeclaration() || (Func))
445 continue;
447 if (VariableSet.contains(GV)) {
448 KernelSet.insert(&Func);
449 break;
450 }
451 }
452 }
453
454 return KernelSet;
455 }
456
458 chooseBestVariableForModuleStrategy(const DataLayout &DL,
460
461
462 struct CandidateTy {
464 size_t UserCount = 0;
465 size_t Size = 0;
466
467 CandidateTy() = default;
468
470 : GV(GV), UserCount(UserCount), Size(AllocSize) {}
471
473
474 if (UserCount < Other.UserCount) {
475 return true;
476 }
477 if (UserCount > Other.UserCount) {
478 return false;
479 }
480
481
483 return false;
484 }
485
487 return true;
488 }
489
490
492 }
493 };
494
495 CandidateTy MostUsed;
496
497 for (auto &K : LDSVars) {
499 if (K.second.size() <= 1) {
500
501
502 continue;
503 }
504 CandidateTy Candidate(
505 GV, K.second.size(),
506 DL.getTypeAllocSize(GV->getValueType()).getFixedValue());
507 if (MostUsed < Candidate)
508 MostUsed = Candidate;
509 }
510
511 return MostUsed.GV;
512 }
513
516
517
519 auto *IntTy =
523 GV->setMetadata(LLVMContext::MD_absolute_symbol,
525 }
526
529
530
531
532 auto [It, Inserted] = tableKernelIndexCache.try_emplace(F);
533 if (Inserted) {
534 auto InsertAt = F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
536
537 It->second = Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
538 }
539
540 return It->second;
541 }
542
543 static std::vector<Function *> assignLDSKernelIDToEachKernel(
546
547
548
549
550
551 std::vector<Function *> OrderedKernels;
552 if (!KernelsThatAllocateTableLDS.empty() ||
553 !KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
554
555 for (Function &Func : M->functions()) {
556 if (Func.isDeclaration())
557 continue;
559 continue;
560
561 if (KernelsThatAllocateTableLDS.contains(&Func) ||
562 KernelsThatIndirectlyAllocateDynamicLDS.contains(&Func)) {
563 assert(Func.hasName());
564 OrderedKernels.push_back(&Func);
565 }
566 }
567
568
569 OrderedKernels = sortByName(std::move(OrderedKernels));
570
571
574
575 if (OrderedKernels.size() > UINT32_MAX) {
576
578 }
579
580 for (size_t i = 0; i < OrderedKernels.size(); i++) {
583 };
584 OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id",
586 }
587 }
588 return OrderedKernels;
589 }
590
591 static void partitionVariablesIntoIndirectStrategies(
598
600 LoweringKindLoc != LoweringKind::hybrid
601 ? nullptr
602 : chooseBestVariableForModuleStrategy(
603 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
604
607 HybridModuleRoot
608 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
609 : EmptySet;
610
611 for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
612
613
614
617 assert(K.second.size() != 0);
618
620 DynamicVariables.insert(GV);
621 continue;
622 }
623
624 switch (LoweringKindLoc) {
625 case LoweringKind::module:
626 ModuleScopeVariables.insert(GV);
627 break;
628
629 case LoweringKind::table:
630 TableLookupVariables.insert(GV);
631 break;
632
633 case LoweringKind::kernel:
634 if (K.second.size() == 1) {
635 KernelAccessVariables.insert(GV);
636 } else {
637
639 "cannot lower LDS '" + GV->getName() +
640 "' to kernel access as it is reachable from multiple kernels");
641 }
642 break;
643
644 case LoweringKind::hybrid: {
645 if (GV == HybridModuleRoot) {
646 assert(K.second.size() != 1);
647 ModuleScopeVariables.insert(GV);
648 } else if (K.second.size() == 1) {
649 KernelAccessVariables.insert(GV);
650 } else if (K.second == HybridModuleRootKernels) {
651 ModuleScopeVariables.insert(GV);
652 } else {
653 TableLookupVariables.insert(GV);
654 }
655 break;
656 }
657 }
658 }
659
660
661
662 assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
663 KernelAccessVariables.size() + DynamicVariables.size() ==
664 LDSToKernelsThatNeedToAccessItIndirectly.size());
665 }
666
667 static GlobalVariable *lowerModuleScopeStructVariables(
670
671
672
673
674
675
676
677 if (ModuleScopeVariables.empty()) {
678 return nullptr;
679 }
680
682
683 LDSVariableReplacement ModuleScopeReplacement =
684 createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
685 ModuleScopeVariables);
686
691
692
693 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
694
695
696 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
697
698
699 replaceLDSVariablesWithStruct(
700 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
702 if () {
703 return false;
704 }
707 });
708
709
710
711
712
713 for (Function &Func : M.functions()) {
714 if (Func.isDeclaration() || (Func))
715 continue;
716
717 if (KernelsThatAllocateModuleLDS.contains(&Func)) {
718 replaceLDSVariablesWithStruct(
719 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
721 if () {
722 return false;
723 }
725 return F == &Func;
726 });
727
728 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
729 }
730 }
731
732 return ModuleScopeReplacement.SGV;
733 }
734
736 lowerKernelScopeStructVariables(
741
742
743
745 for (Function &Func : M.functions()) {
746 if (Func.isDeclaration() || (Func))
747 continue;
748
750
751
752 for (auto &v : LDSUsesInfo.direct_access[&Func]) {
754 KernelUsedVariables.insert(v);
755 }
756 }
757
758
759
762 KernelUsedVariables.insert(v);
763 }
764 }
765
766
767
768 if (KernelsThatAllocateModuleLDS.contains(&Func)) {
770 KernelUsedVariables.erase(v);
771 }
772 }
773
774 if (KernelUsedVariables.empty()) {
775
776
777 continue;
778 }
779
780
781
782
783
784
785
786 if (!Func.hasName()) {
788 }
789
790 std::string VarName =
791 (Twine("llvm.amdgcn.kernel.") + Func.getName() + ".lds").str();
792
793 auto Replacement =
794 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
795
796
797
798
802 markUsedByKernel(&Func, Replacement.SGV);
803
804
805 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
806 KernelToReplacement[&Func] = Replacement;
807
808
809 replaceLDSVariablesWithStruct(
810 M, KernelUsedVariables, Replacement, [&Func](Use &U) {
812 return I && I->getFunction() == &Func;
813 });
814 }
815 return KernelToReplacement;
816 }
817
819 buildRepresentativeDynamicLDSInstance(Module &M, LDSUsesInfoTy &LDSUsesInfo,
821
822
823
824
825
826
827
828
829
830
832
835 Align MaxDynamicAlignment(1);
836
837 auto UpdateMaxAlignment = [&MaxDynamicAlignment, &DL](GlobalVariable *GV) {
839 MaxDynamicAlignment =
841 }
842 };
843
845 UpdateMaxAlignment(GV);
846 }
847
849 UpdateMaxAlignment(GV);
850 }
851
857 false);
858 N->setAlignment(MaxDynamicAlignment);
859
861 return N;
862 }
863
868 std::vector<Function *> const &OrderedKernels) {
870 if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
874
875 std::vector<Constant *> newDynamicLDS;
876
877
878 for (auto &func : OrderedKernels) {
879
880 if (KernelsThatIndirectlyAllocateDynamicLDS.contains(func)) {
884 }
885
887 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);
888
889 KernelToCreatedDynamicLDS[func] = N;
890
891 markUsedByKernel(func, N);
892
895 emptyCharArray, N, ConstantInt::get(I32, 0), true);
897 } else {
899 }
900 }
901 assert(OrderedKernels.size() == newDynamicLDS.size());
902
907 "llvm.amdgcn.dynlds.offset.table", nullptr,
909
913 if ()
914 continue;
916 continue;
917
918 replaceUseWithTableLookup(M, Builder, table, GV, U, nullptr);
919 }
920 }
921 }
922 return KernelToCreatedDynamicLDS;
923 }
924
925 bool runOnModule(Module &M) {
927 bool Changed = superAlignLDSGlobals(M);
928
930
931 Changed = true;
932
933
934
936
937
943 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
944 }
945 }
946
947
952 partitionVariablesIntoIndirectStrategies(
953 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
954 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
955 DynamicVariables);
956
957
958
959
961 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
962 ModuleScopeVariables);
964 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
965 TableLookupVariables);
966
968 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
969 DynamicVariables);
970
971 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
972 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
973
975 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
976 KernelsThatAllocateModuleLDS,
977 MaybeModuleScopeStruct);
978
979
980 for (auto &GV : KernelAccessVariables) {
981 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
982 assert(funcs.size() == 1);
983 LDSVariableReplacement Replacement =
984 KernelToReplacement[*(funcs.begin())];
985
988
989 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) {
991 });
992 }
993
994
995 std::vector<Function *> OrderedKernels =
996 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
997 KernelsThatIndirectlyAllocateDynamicLDS);
998
999 if (!KernelsThatAllocateTableLDS.empty()) {
1002
1003
1004
1005 auto TableLookupVariablesOrdered =
1006 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.begin(),
1007 TableLookupVariables.end()));
1008
1010 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1011 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1012 LookupTable);
1013 }
1014
1016 lowerDynamicLDSVariables(M, LDSUsesInfo,
1017 KernelsThatIndirectlyAllocateDynamicLDS,
1018 DynamicVariables, OrderedKernels);
1019
1020
1021
1022
1023 for (auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1024 &KernelsThatAllocateTableLDS})
1027
1028
1029
1030 {
1032
1033 for (Function &Func : M.functions()) {
1034 if (Func.isDeclaration() || (Func))
1035 continue;
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048 const bool AllocateModuleScopeStruct =
1049 MaybeModuleScopeStruct &&
1050 KernelsThatAllocateModuleLDS.contains(&Func);
1051
1052 auto Replacement = KernelToReplacement.find(&Func);
1053 const bool AllocateKernelScopeStruct =
1054 Replacement != KernelToReplacement.end();
1055
1056 const bool AllocateDynamicVariable =
1057 KernelToCreatedDynamicLDS.contains(&Func);
1058
1060
1061 if (AllocateModuleScopeStruct) {
1062
1063
1065 }
1066
1067 if (AllocateKernelScopeStruct) {
1068 GlobalVariable *KernelStruct = Replacement->second.SGV;
1070 recordLDSAbsoluteAddress(&M, KernelStruct, Offset);
1072 }
1073
1074
1075
1076
1077
1078 if (AllocateDynamicVariable) {
1079 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1081 recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);
1082 }
1083
1085 (void)TM;
1086 std::string Buffer;
1089
1090
1091
1092
1093
1094
1095
1096 if (AllocateDynamicVariable)
1098
1099 Func.addFnAttr("amdgpu-lds-size", Buffer);
1100 }
1101 }
1102 }
1103
1106
1110 }
1111
1113 }
1114
1115private:
1116
1117
1118 static bool superAlignLDSGlobals(Module &M) {
1121 if (!SuperAlignLDSGlobals) {
1123 }
1124
1125 for (auto &GV : M.globals()) {
1127
1128 continue;
1129 }
1131
1132 continue;
1133 }
1134
1136
1137 continue;
1138 }
1139
1142
1143 if (GVSize > 8) {
1144
1145 Alignment = std::max(Alignment, Align(16));
1146 } else if (GVSize > 4) {
1147
1148 Alignment = std::max(Alignment, Align(8));
1149 } else if (GVSize > 2) {
1150
1151 Alignment = std::max(Alignment, Align(4));
1152 } else if (GVSize > 1) {
1153
1154 Alignment = std::max(Alignment, Align(2));
1155 }
1156
1160 }
1161 }
1163 }
1164
1165 static LDSVariableReplacement createLDSVariableReplacement(
1166 Module &M, std::string VarName,
1168
1169
1170
1171
1172
1176
1178 LayoutFields.reserve(LDSVarsToTransform.size());
1179 {
1180
1181
1182
1183 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1184 LDSVarsToTransform.begin(), LDSVarsToTransform.end()));
1185
1191 }
1192 }
1193
1195
1196 std::vector<GlobalVariable *> LocalVars;
1198 LocalVars.reserve(LDSVarsToTransform.size());
1199 IsPaddingField.reserve(LDSVarsToTransform.size());
1200 {
1202 for (auto &F : LayoutFields) {
1204 static_cast<GlobalVariable *>(const_cast<void *>(F.Id));
1205 Align DataAlign = F.Alignment;
1206
1208 if (uint64_t Rem = CurrentOffset % DataAlignV) {
1209 uint64_t Padding = DataAlignV - Rem;
1210
1211
1212
1213
1214
1221 CurrentOffset += Padding;
1222 }
1223
1224 LocalVars.push_back(FGV);
1225 IsPaddingField.push_back(false);
1226 CurrentOffset += F.Size;
1227 }
1228 }
1229
1230 std::vector<Type *> LocalVarTypes;
1231 LocalVarTypes.reserve(LocalVars.size());
1232 std::transform(
1233 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1234 [](const GlobalVariable *V) -> Type * { return V->getValueType(); });
1235
1237
1239
1243 false);
1245
1248 for (size_t I = 0; I < LocalVars.size(); I++) {
1250 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
1252 if (IsPaddingField[I]) {
1255 } else {
1256 Map[GV] = GEP;
1257 }
1258 }
1259 assert(Map.size() == LDSVarsToTransform.size());
1260 return {SGV, std::move(Map)};
1261 }
1262
1263 template
1264 static void replaceLDSVariablesWithStruct(
1266 const LDSVariableReplacement &Replacement, PredicateTy Predicate) {
1269
1270
1271
1272
1273 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1274 LDSVarsToTransformArg.begin(), LDSVarsToTransformArg.end()));
1275
1276
1277
1280 const size_t NumberVars = LDSVarsToTransform.size();
1281 if (NumberVars > 1) {
1283 AliasScopes.reserve(NumberVars);
1285 for (size_t I = 0; I < NumberVars; I++) {
1288 }
1289 NoAliasList.append(&AliasScopes[1], AliasScopes.end());
1290 }
1291
1292
1293
1294 for (size_t I = 0; I < NumberVars; I++) {
1296 Constant *GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1297
1299
1300 APInt APOff(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1301 GEP->stripAndAccumulateInBoundsConstantOffsets(DL, APOff);
1303
1306
1307 if (I)
1308 NoAliasList[I - 1] = AliasScopes[I - 1];
1310 NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
1311 MDNode *AliasScope =
1312 AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
1313
1314 refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
1315 }
1316 }
1317
1318 static void refineUsesAlignmentAndAA(Value *Ptr, Align A,
1320 MDNode *NoAlias, unsigned MaxDepth = 5) {
1321 if (!MaxDepth || (A == 1 && !AliasScope))
1322 return;
1323
1325
1328 if (AliasScope && I->mayReadOrWriteMemory()) {
1329 MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
1331 : AliasScope);
1332 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1333
1334 MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1357 auto Intersection = set_intersection(ExistingDomains, LDSDomains);
1358 if (Intersection.empty()) {
1360 } else {
1362 }
1363 I->setMetadata(LLVMContext::MD_noalias, NA);
1364 }
1365 }
1366
1368 LI->setAlignment(std::max(A, LI->getAlign()));
1369 continue;
1370 }
1372 if (SI->getPointerOperand() == Ptr)
1373 SI->setAlignment(std::max(A, SI->getAlign()));
1374 continue;
1375 }
1377
1378
1379 if (AI->getPointerOperand() == Ptr)
1380 AI->setAlignment(std::max(A, AI->getAlign()));
1381 continue;
1382 }
1384 if (AI->getPointerOperand() == Ptr)
1385 AI->setAlignment(std::max(A, AI->getAlign()));
1386 continue;
1387 }
1389 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
1391 if (GEP->getPointerOperand() == Ptr) {
1393 if (GEP->accumulateConstantOffset(DL, Off))
1395 refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
1396 MaxDepth - 1);
1397 }
1398 continue;
1399 }
1401 if (I->getOpcode() == Instruction::BitCast ||
1402 I->getOpcode() == Instruction::AddrSpaceCast)
1403 refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
1404 }
1405 }
1406 }
1407};
1408
1409class AMDGPULowerModuleLDSLegacy : public ModulePass {
1410public:
1412 static char ID;
1413
1416
1417 void getAnalysisUsage(AnalysisUsage &AU) const override {
1418 if (!TM)
1420 }
1421
1422 bool runOnModule(Module &M) override {
1423 if (!TM) {
1424 auto &TPC = getAnalysis();
1426 }
1427
1428 return AMDGPULowerModuleLDS(*TM).runOnModule(M);
1429 }
1430};
1431
1432}
1433char AMDGPULowerModuleLDSLegacy::ID = 0;
1434
1436
1438 "Lower uses of LDS variables from non-kernel functions",
1439 false, false)
1442 "Lower uses of LDS variables from non-kernel functions",
1444
1447 return new AMDGPULowerModuleLDSLegacy(TM);
1448}
1449
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
The basic data container for the call graph of a Module of IR.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool runOnModule(Module &) override
ImmutablePasses are never run.
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A container for an operand bundle being viewed as a set of values rather than a set of uses.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A simple AA result which uses scoped-noalias metadata to answer queries.
static LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains)
Collect the set of scoped domains relevant to the noalias scopes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void sort(IteratorTy Start, IteratorTy End)
char & AMDGPULowerModuleLDSLegacyPassID
Definition AMDGPULowerModuleLDSPass.cpp:1435
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
Definition AMDGPULowerModuleLDSPass.cpp:1446
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition AMDGPULowerModuleLDSPass.cpp:1450
const AMDGPUTargetMachine & TM
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.