LLVM: lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
195#include "llvm/IR/IntrinsicsAMDGPU.h"
207
208#include
209
210#include
211
212#define DEBUG_TYPE "amdgpu-lower-module-lds"
213
214using namespace llvm;
215using namespace AMDGPU;
216
217namespace {
218
220 "amdgpu-super-align-lds-globals",
221 cl::desc("Increase alignment of LDS if it is not on align boundary"),
223
224enum class LoweringKind { module, table, kernel, hybrid };
226 "amdgpu-lower-module-lds-strategy",
227 cl::desc("Specify lowering strategy for function LDS access:"), cl::Hidden,
228 cl::init(LoweringKind::hybrid),
230 clEnumValN(LoweringKind::table, "table", "Lower via table lookup"),
231 clEnumValN(LoweringKind::module, "module", "Lower via module struct"),
233 LoweringKind::kernel, "kernel",
234 "Lower variables reachable from one kernel, otherwise abort"),
235 clEnumValN(LoweringKind::hybrid, "hybrid",
236 "Lower via mixture of above strategies")));
237
238template std::vector sortByName(std::vector &&V) {
239 llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
240 return L->getName() < R->getName();
241 });
242 return {std::move(V)};
243}
244
245class AMDGPULowerModuleLDS {
247
248 static void
249 removeLocalVarsFromUsedLists(Module &M,
251
252
255 LocalVarsSet.insert(cast(LocalVar->stripPointerCasts()));
256
258 M, [&LocalVarsSet](Constant *C) { return LocalVarsSet.count(C); });
259
261 LocalVar->removeDeadConstantUsers();
262 }
263
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285 BasicBlock *Entry = &Func->getEntryBlock();
286 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
287
289 Func->getParent(), Intrinsic::donothing, {});
290
291 Value *UseInstance[1] = {
293
296 }
297
298public:
300
301 struct LDSVariableReplacement {
304 };
305
306
307
308
309
310
311 static Constant *getAddressesOfVariablesInKernel(
314
315
316
317
318
320
321 ArrayType *KernelOffsetsType = ArrayType::get(I32, Variables.size());
322
325 auto ConstantGepIt = LDSVarsToConstantGEP.find(GV);
326 if (ConstantGepIt != LDSVarsToConstantGEP.end()) {
328 Elements.push_back(elt);
329 } else {
331 }
332 }
334 }
335
340 if (Variables.empty()) {
341 return nullptr;
342 }
344
345 const size_t NumberVariables = Variables.size();
346 const size_t NumberKernels = kernels.size();
347
350
351 ArrayType *AllKernelsOffsetsType =
352 ArrayType::get(KernelOffsetsType, NumberKernels);
353
355 std::vector<Constant *> overallConstantExprElts(NumberKernels);
356 for (size_t i = 0; i < NumberKernels; i++) {
357 auto Replacement = KernelToReplacement.find(kernels[i]);
358 overallConstantExprElts[i] =
359 (Replacement == KernelToReplacement.end())
360 ? Missing
361 : getAddressesOfVariablesInKernel(
362 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
363 }
364
367
372 }
373
377 Value *OptionalIndex) {
378
381 auto *I = cast(U.getUser());
382
383 Value *tableKernelIndex = getTableLookupKernelIndex(M, I->getFunction());
384
385 if (auto *Phi = dyn_cast(I)) {
386 BasicBlock *BB = Phi->getIncomingBlock(U);
388 } else {
390 }
391
393 ConstantInt::get(I32, 0),
394 tableKernelIndex,
395 };
396 if (OptionalIndex)
398
400 LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());
401
403
404 Value *replacement =
406
407 U.set(replacement);
408 }
409
410 void replaceUsesInInstructionsWithTableLookup(
413
417
418 for (size_t Index = 0; Index < ModuleScopeVariables.size(); Index++) {
419 auto *GV = ModuleScopeVariables[Index];
420
422 auto *I = dyn_cast(U.getUser());
423 if ()
424 continue;
425
426 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
427 ConstantInt::get(I32, Index));
428 }
429 }
430 }
431
435
437
438 if (VariableSet.empty())
439 return KernelSet;
440
441 for (Function &Func : M.functions()) {
442 if (Func.isDeclaration() || (&Func))
443 continue;
445 if (VariableSet.contains(GV)) {
446 KernelSet.insert(&Func);
447 break;
448 }
449 }
450 }
451
452 return KernelSet;
453 }
454
456 chooseBestVariableForModuleStrategy(const DataLayout &DL,
458
459
460 struct CandidateTy {
462 size_t UserCount = 0;
463 size_t Size = 0;
464
465 CandidateTy() = default;
466
468 : GV(GV), UserCount(UserCount), Size(AllocSize) {}
469
471
472 if (UserCount < Other.UserCount) {
473 return true;
474 }
475 if (UserCount > Other.UserCount) {
476 return false;
477 }
478
479
481 return false;
482 }
483
485 return true;
486 }
487
488
490 }
491 };
492
493 CandidateTy MostUsed;
494
495 for (auto &K : LDSVars) {
497 if (K.second.size() <= 1) {
498
499
500 continue;
501 }
502 CandidateTy Candidate(
503 GV, K.second.size(),
504 DL.getTypeAllocSize(GV->getValueType()).getFixedValue());
505 if (MostUsed < Candidate)
506 MostUsed = Candidate;
507 }
508
509 return MostUsed.GV;
510 }
511
514
515
517 auto *IntTy =
521 GV->setMetadata(LLVMContext::MD_absolute_symbol,
523 }
524
527
528
529
530 auto [It, Inserted] = tableKernelIndexCache.try_emplace(F);
531 if (Inserted) {
532 auto InsertAt = F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
534
535 It->second =
536 Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {}, {});
537 }
538
539 return It->second;
540 }
541
542 static std::vector<Function *> assignLDSKernelIDToEachKernel(
545
546
547
548
549
550 std::vector<Function *> OrderedKernels;
551 if (!KernelsThatAllocateTableLDS.empty() ||
552 !KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
553
554 for (Function &Func : M->functions()) {
555 if (Func.isDeclaration())
556 continue;
558 continue;
559
560 if (KernelsThatAllocateTableLDS.contains(&Func) ||
561 KernelsThatIndirectlyAllocateDynamicLDS.contains(&Func)) {
562 assert(Func.hasName());
563 OrderedKernels.push_back(&Func);
564 }
565 }
566
567
568 OrderedKernels = sortByName(std::move(OrderedKernels));
569
570
573
574 if (OrderedKernels.size() > UINT32_MAX) {
575
576 report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels");
577 }
578
579 for (size_t i = 0; i < OrderedKernels.size(); i++) {
582 };
583 OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id",
585 }
586 }
587 return OrderedKernels;
588 }
589
590 static void partitionVariablesIntoIndirectStrategies(
597
599 LoweringKindLoc != LoweringKind::hybrid
600 ? nullptr
601 : chooseBestVariableForModuleStrategy(
602 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
603
606 HybridModuleRoot
607 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
608 : EmptySet;
609
610 for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
611
612
613
616 assert(K.second.size() != 0);
617
619 DynamicVariables.insert(GV);
620 continue;
621 }
622
623 switch (LoweringKindLoc) {
624 case LoweringKind::module:
625 ModuleScopeVariables.insert(GV);
626 break;
627
628 case LoweringKind::table:
629 TableLookupVariables.insert(GV);
630 break;
631
632 case LoweringKind::kernel:
633 if (K.second.size() == 1) {
634 KernelAccessVariables.insert(GV);
635 } else {
637 "cannot lower LDS '" + GV->getName() +
638 "' to kernel access as it is reachable from multiple kernels");
639 }
640 break;
641
642 case LoweringKind::hybrid: {
643 if (GV == HybridModuleRoot) {
644 assert(K.second.size() != 1);
645 ModuleScopeVariables.insert(GV);
646 } else if (K.second.size() == 1) {
647 KernelAccessVariables.insert(GV);
648 } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
649 ModuleScopeVariables.insert(GV);
650 } else {
651 TableLookupVariables.insert(GV);
652 }
653 break;
654 }
655 }
656 }
657
658
659
660 assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
661 KernelAccessVariables.size() + DynamicVariables.size() ==
662 LDSToKernelsThatNeedToAccessItIndirectly.size());
663 }
664
665 static GlobalVariable *lowerModuleScopeStructVariables(
668
669
670
671
672
673
674
675 if (ModuleScopeVariables.empty()) {
676 return nullptr;
677 }
678
680
681 LDSVariableReplacement ModuleScopeReplacement =
682 createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
683 ModuleScopeVariables);
684
687 cast(ModuleScopeReplacement.SGV),
688 PointerType::getUnqual(Ctx)))});
689
690
691 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
692
693
694 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
695
696
697 replaceLDSVariablesWithStruct(
698 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
699 Instruction *I = dyn_cast(U.getUser());
700 if () {
701 return false;
702 }
705 });
706
707
708
709
710
711 for (Function &Func : M.functions()) {
712 if (Func.isDeclaration() || (&Func))
713 continue;
714
715 if (KernelsThatAllocateModuleLDS.contains(&Func)) {
716 replaceLDSVariablesWithStruct(
717 M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
718 Instruction *I = dyn_cast(U.getUser());
719 if () {
720 return false;
721 }
723 return F == &Func;
724 });
725
726 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
727 }
728 }
729
730 return ModuleScopeReplacement.SGV;
731 }
732
734 lowerKernelScopeStructVariables(
739
740
741
743 for (Function &Func : M.functions()) {
744 if (Func.isDeclaration() || (&Func))
745 continue;
746
748
749
750 for (auto &v : LDSUsesInfo.direct_access[&Func]) {
752 KernelUsedVariables.insert(v);
753 }
754 }
755
756
757
760 KernelUsedVariables.insert(v);
761 }
762 }
763
764
765
766 if (KernelsThatAllocateModuleLDS.contains(&Func)) {
768 KernelUsedVariables.erase(v);
769 }
770 }
771
772 if (KernelUsedVariables.empty()) {
773
774
775 continue;
776 }
777
778
779
780
781
782
783
784 if (!Func.hasName()) {
786 }
787
788 std::string VarName =
789 (Twine("llvm.amdgcn.kernel.") + Func.getName() + ".lds").str();
790
791 auto Replacement =
792 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
793
794
795
796
799 !Accesses->second.empty())
800 markUsedByKernel(&Func, Replacement.SGV);
801
802
803 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
804 KernelToReplacement[&Func] = Replacement;
805
806
807 replaceLDSVariablesWithStruct(
808 M, KernelUsedVariables, Replacement, [&Func](Use &U) {
809 Instruction *I = dyn_cast(U.getUser());
810 return I && I->getFunction() == &Func;
811 });
812 }
813 return KernelToReplacement;
814 }
815
817 buildRepresentativeDynamicLDSInstance(Module &M, LDSUsesInfoTy &LDSUsesInfo,
819
820
821
822
823
824
825
826
827
828
830
833 Align MaxDynamicAlignment(1);
834
835 auto UpdateMaxAlignment = [&MaxDynamicAlignment, &DL](GlobalVariable *GV) {
837 MaxDynamicAlignment =
839 }
840 };
841
843 UpdateMaxAlignment(GV);
844 }
845
847 UpdateMaxAlignment(GV);
848 }
849
851 auto *emptyCharArray = ArrayType::get(Type::getInt8Ty(Ctx), 0);
855 false);
856 N->setAlignment(MaxDynamicAlignment);
857
859 return N;
860 }
861
866 std::vector<Function *> const &OrderedKernels) {
868 if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
872
873 std::vector<Constant *> newDynamicLDS;
874
875
876 for (auto &func : OrderedKernels) {
877
878 if (KernelsThatIndirectlyAllocateDynamicLDS.contains(func)) {
880 if (->hasName()) {
882 }
883
885 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);
886
887 KernelToCreatedDynamicLDS[func] = N;
888
889 markUsedByKernel(func, N);
890
891 auto *emptyCharArray = ArrayType::get(Type::getInt8Ty(Ctx), 0);
893 emptyCharArray, N, ConstantInt::get(I32, 0), true);
895 } else {
897 }
898 }
899 assert(OrderedKernels.size() == newDynamicLDS.size());
900
901 ArrayType *t = ArrayType::get(I32, newDynamicLDS.size());
905 "llvm.amdgcn.dynlds.offset.table", nullptr,
907
910 auto *I = dyn_cast(U.getUser());
911 if ()
912 continue;
914 continue;
915
916 replaceUseWithTableLookup(M, Builder, table, GV, U, nullptr);
917 }
918 }
919 }
920 return KernelToCreatedDynamicLDS;
921 }
922
925 bool NeedsReplacement = false;
926 for (Use &U : GV->uses()) {
927 if (auto *I = dyn_cast(U.getUser())) {
930 NeedsReplacement = true;
931 break;
932 }
933 }
934 }
935 if (!NeedsReplacement)
936 return GV;
937
944 if (auto *I = dyn_cast(U.getUser())) {
947 U.getUser()->replaceUsesOfWith(GV, NewGV);
948 }
949 }
950 }
951 return NewGV;
952 }
953
954 bool lowerSpecialLDSVariables(
957 bool Changed = false;
958
959 int NumAbsolutes = 0;
960 std::vector<GlobalVariable *> OrderedGVs;
961 for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
964 continue;
965
966
967
968 if (LDSToKernelsThatNeedToAccessItIndirectly[GV].size() > 1) {
969 OrderedGVs.push_back(GV);
970 } else {
971
972
974 }
975 LDSToKernelsThatNeedToAccessItIndirectly.erase(GV);
976 }
977 OrderedGVs = sortByName(std::move(OrderedGVs));
979 int BarId = ++NumAbsolutes;
981
982
983 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
984 recordLDSAbsoluteAddress(&M, GV, Offset);
985 }
986 OrderedGVs.clear();
987
988
989
990
991 std::vector<Function *> OrderedKernels;
995 OrderedKernels.push_back(F);
996 }
997 OrderedKernels = sortByName(std::move(OrderedKernels));
998
1000 for (Function *F : OrderedKernels) {
1003 continue;
1004
1007
1008 continue;
1009 }
1010 OrderedGVs.push_back(GV);
1011 }
1012 OrderedGVs = sortByName(std::move(OrderedGVs));
1014
1015
1016 auto NewGV = uniquifyGVPerKernel(M, GV, F);
1017 Changed |= (NewGV != GV);
1018 int BarId = (NumAbsolutes + 1);
1019 if (Kernel2BarId.find(F) != Kernel2BarId.end()) {
1020 BarId = (Kernel2BarId[F] + 1);
1021 }
1022 Kernel2BarId[F] = BarId;
1024 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
1025 recordLDSAbsoluteAddress(&M, NewGV, Offset);
1026 }
1027 OrderedGVs.clear();
1028 }
1029
1034 K.second.erase(GV);
1035 }
1036 }
1037 return Changed;
1038 }
1039
1040 bool runOnModule(Module &M) {
1042 bool Changed = superAlignLDSGlobals(M);
1043
1045
1046 Changed = true;
1047
1048
1049
1051
1052
1058 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
1059 }
1060 }
1061
1063
1064 Changed |= lowerSpecialLDSVariables(
1065 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly);
1066 }
1067
1068
1073 partitionVariablesIntoIndirectStrategies(
1074 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
1075 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
1076 DynamicVariables);
1077
1078
1079
1080
1082 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1083 ModuleScopeVariables);
1085 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1086 TableLookupVariables);
1087
1089 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1090 DynamicVariables);
1091
1092 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
1093 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
1094
1096 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
1097 KernelsThatAllocateModuleLDS,
1098 MaybeModuleScopeStruct);
1099
1100
1101 for (auto &GV : KernelAccessVariables) {
1102 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
1103 assert(funcs.size() == 1);
1104 LDSVariableReplacement Replacement =
1105 KernelToReplacement[*(funcs.begin())];
1106
1109
1110 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) {
1111 return isa(U.getUser());
1112 });
1113 }
1114
1115
1116 std::vector<Function *> OrderedKernels =
1117 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
1118 KernelsThatIndirectlyAllocateDynamicLDS);
1119
1120 if (!KernelsThatAllocateTableLDS.empty()) {
1123
1124
1125
1126 auto TableLookupVariablesOrdered =
1127 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.begin(),
1128 TableLookupVariables.end()));
1129
1131 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1132 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1133 LookupTable);
1134 }
1135
1137 lowerDynamicLDSVariables(M, LDSUsesInfo,
1138 KernelsThatIndirectlyAllocateDynamicLDS,
1139 DynamicVariables, OrderedKernels);
1140
1141
1142
1143
1144 for (auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1145 &KernelsThatAllocateTableLDS})
1148
1149
1150
1151 {
1153
1154 for (Function &Func : M.functions()) {
1155 if (Func.isDeclaration() || (&Func))
1156 continue;
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169 const bool AllocateModuleScopeStruct =
1170 MaybeModuleScopeStruct &&
1171 KernelsThatAllocateModuleLDS.contains(&Func);
1172
1173 auto Replacement = KernelToReplacement.find(&Func);
1174 const bool AllocateKernelScopeStruct =
1175 Replacement != KernelToReplacement.end();
1176
1177 const bool AllocateDynamicVariable =
1178 KernelToCreatedDynamicLDS.contains(&Func);
1179
1181
1182 if (AllocateModuleScopeStruct) {
1183
1184
1186 }
1187
1188 if (AllocateKernelScopeStruct) {
1189 GlobalVariable *KernelStruct = Replacement->second.SGV;
1191 recordLDSAbsoluteAddress(&M, KernelStruct, Offset);
1193 }
1194
1195
1196
1197
1198
1199 if (AllocateDynamicVariable) {
1200 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1202 recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);
1203 }
1204
1206 (void)TM;
1207 std::string Buffer;
1210
1211
1212
1213
1214
1215
1216
1217 if (AllocateDynamicVariable)
1219
1220 Func.addFnAttr("amdgpu-lds-size", Buffer);
1221 }
1222 }
1223 }
1224
1227
1231 }
1232
1233 return Changed;
1234 }
1235
1236private:
1237
1238
1239 static bool superAlignLDSGlobals(Module &M) {
1241 bool Changed = false;
1242 if (!SuperAlignLDSGlobals) {
1243 return Changed;
1244 }
1245
1246 for (auto &GV : M.globals()) {
1248
1249 continue;
1250 }
1252
1253 continue;
1254 }
1255
1257
1258 continue;
1259 }
1260
1263
1264 if (GVSize > 8) {
1265
1266 Alignment = std::max(Alignment, Align(16));
1267 } else if (GVSize > 4) {
1268
1269 Alignment = std::max(Alignment, Align(8));
1270 } else if (GVSize > 2) {
1271
1272 Alignment = std::max(Alignment, Align(4));
1273 } else if (GVSize > 1) {
1274
1275 Alignment = std::max(Alignment, Align(2));
1276 }
1277
1279 Changed = true;
1281 }
1282 }
1283 return Changed;
1284 }
1285
1286 static LDSVariableReplacement createLDSVariableReplacement(
1287 Module &M, std::string VarName,
1289
1290
1291
1292
1293
1297
1299 LayoutFields.reserve(LDSVarsToTransform.size());
1300 {
1301
1302
1303
1304 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1305 LDSVarsToTransform.begin(), LDSVarsToTransform.end()));
1306
1312 }
1313 }
1314
1316
1317 std::vector<GlobalVariable *> LocalVars;
1319 LocalVars.reserve(LDSVarsToTransform.size());
1320 IsPaddingField.reserve(LDSVarsToTransform.size());
1321 {
1323 for (auto &F : LayoutFields) {
1325 static_cast<GlobalVariable *>(const_cast<void *>(F.Id));
1326 Align DataAlign = F.Alignment;
1327
1329 if (uint64_t Rem = CurrentOffset % DataAlignV) {
1330 uint64_t Padding = DataAlignV - Rem;
1331
1332
1333
1334
1335
1342 CurrentOffset += Padding;
1343 }
1344
1345 LocalVars.push_back(FGV);
1346 IsPaddingField.push_back(false);
1347 CurrentOffset += F.Size;
1348 }
1349 }
1350
1351 std::vector<Type *> LocalVarTypes;
1352 LocalVarTypes.reserve(LocalVars.size());
1353 std::transform(
1354 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1355 [](const GlobalVariable *V) -> Type * { return V->getValueType(); });
1356
1358
1360
1364 false);
1366
1369 for (size_t I = 0; I < LocalVars.size(); I++) {
1371 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
1373 if (IsPaddingField[I]) {
1376 } else {
1377 Map[GV] = GEP;
1378 }
1379 }
1380 assert(Map.size() == LDSVarsToTransform.size());
1381 return {SGV, std::move(Map)};
1382 }
1383
1384 template
1385 static void replaceLDSVariablesWithStruct(
1387 const LDSVariableReplacement &Replacement, PredicateTy Predicate) {
1390
1391
1392
1393
1394 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1395 LDSVarsToTransformArg.begin(), LDSVarsToTransformArg.end()));
1396
1397
1398
1401 const size_t NumberVars = LDSVarsToTransform.size();
1402 if (NumberVars > 1) {
1404 AliasScopes.reserve(NumberVars);
1406 for (size_t I = 0; I < NumberVars; I++) {
1409 }
1410 NoAliasList.append(&AliasScopes[1], AliasScopes.end());
1411 }
1412
1413
1414
1415 for (size_t I = 0; I < NumberVars; I++) {
1417 Constant *GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1418
1420
1421 APInt APOff(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1422 GEP->stripAndAccumulateInBoundsConstantOffsets(DL, APOff);
1424
1427
1428 if (I)
1429 NoAliasList[I - 1] = AliasScopes[I - 1];
1431 NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
1432 MDNode *AliasScope =
1433 AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
1434
1435 refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
1436 }
1437 }
1438
1439 static void refineUsesAlignmentAndAA(Value *Ptr, Align A,
1441 MDNode *NoAlias, unsigned MaxDepth = 5) {
1442 if (!MaxDepth || (A == 1 && !AliasScope))
1443 return;
1444
1445 for (User *U : Ptr->users()) {
1446 if (auto *I = dyn_cast(U)) {
1447 if (AliasScope && I->mayReadOrWriteMemory()) {
1448 MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
1450 : AliasScope);
1451 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1452
1453 MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
1455 I->setMetadata(LLVMContext::MD_noalias, NA);
1456 }
1457 }
1458
1459 if (auto *LI = dyn_cast(U)) {
1460 LI->setAlignment(std::max(A, LI->getAlign()));
1461 continue;
1462 }
1463 if (auto *SI = dyn_cast(U)) {
1464 if (SI->getPointerOperand() == Ptr)
1465 SI->setAlignment(std::max(A, SI->getAlign()));
1466 continue;
1467 }
1468 if (auto *AI = dyn_cast(U)) {
1469
1470
1471 if (AI->getPointerOperand() == Ptr)
1472 AI->setAlignment(std::max(A, AI->getAlign()));
1473 continue;
1474 }
1475 if (auto *AI = dyn_cast(U)) {
1476 if (AI->getPointerOperand() == Ptr)
1477 AI->setAlignment(std::max(A, AI->getAlign()));
1478 continue;
1479 }
1480 if (auto *GEP = dyn_cast(U)) {
1481 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
1483 if (GEP->getPointerOperand() == Ptr) {
1485 if (GEP->accumulateConstantOffset(DL, Off))
1487 refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
1488 MaxDepth - 1);
1489 }
1490 continue;
1491 }
1492 if (auto *I = dyn_cast(U)) {
1493 if (I->getOpcode() == Instruction::BitCast ||
1494 I->getOpcode() == Instruction::AddrSpaceCast)
1495 refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
1496 }
1497 }
1498 }
1499};
1500
1501class AMDGPULowerModuleLDSLegacy : public ModulePass {
1502public:
1504 static char ID;
1505
1509 }
1510
1512 if (!TM)
1514 }
1515
1517 if (!TM) {
1518 auto &TPC = getAnalysis();
1520 }
1521
1522 return AMDGPULowerModuleLDS(*TM).runOnModule(M);
1523 }
1524};
1525
1526}
1527char AMDGPULowerModuleLDSLegacy::ID = 0;
1528
1530
1532 "Lower uses of LDS variables from non-kernel functions",
1533 false, false)
1538
1541 return new AMDGPULowerModuleLDSLegacy(TM);
1542}
1543
1548}
Lower uses of LDS variables from non kernel functions
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
The basic data container for the call graph of a Module of IR.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
LinkageTypes getLinkage() const
bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
ThreadLocalMode getThreadLocalMode() const
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
void copyAttributesFrom(const GlobalVariable *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
A container for an operand bundle being viewed as a set of values rather than a set of uses.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ BARRIER_SCOPE_WORKGROUP
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
bool isKernelLDS(const Function *F)
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &)
void sort(IteratorTy Start, IteratorTy End)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
char & AMDGPULowerModuleLDSLegacyPassID
void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const AMDGPUTargetMachine & TM
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.