LLVM: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
47
48#define DEBUG_TYPE "amdgpu-promote-alloca"
49
50using namespace llvm;
51
52namespace {
53
55 DisablePromoteAllocaToVector("disable-promote-alloca-to-vector",
56 cl::desc("Disable promote alloca to vector"),
58
60 DisablePromoteAllocaToLDS("disable-promote-alloca-to-lds",
61 cl::desc("Disable promote alloca to LDS"),
63
65 "amdgpu-promote-alloca-to-vector-limit",
66 cl::desc("Maximum byte size to consider promote alloca to vector"),
68
70 "amdgpu-promote-alloca-to-vector-max-regs",
72 "Maximum vector size (in 32b registers) to use when promoting alloca"),
74
75
76
78 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
79 cl::desc("Ratio of VGPRs to budget for promoting alloca to vectors"),
81
83 LoopUserWeight("promote-alloca-vector-loop-user-weight",
84 cl::desc("The bonus weight of users of allocas within loop "
85 "when sorting profitable allocas"),
87
88
89class AMDGPUPromoteAllocaImpl {
90private:
95
96
98 uint32_t CurrentLocalMemUsage = 0;
99 unsigned MaxVGPRs;
100 unsigned VGPRBudgetRatio;
101 unsigned MaxVectorRegs;
102
103 bool IsAMDGCN = false;
104 bool IsAMDHSA = false;
105
106 std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
108
109
110
111 bool collectUsesWithPtrTypes(Value *BaseAlloca, Value *Val,
112 std::vector<Value *> &WorkList) const;
113
114
115
116
117
118 bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
120 int OpIdx1) const;
121
122
123 bool hasSufficientLocalMem(const Function &F);
124
126 bool tryPromoteAllocaToVector(AllocaInst &I);
127 bool tryPromoteAllocaToLDS(AllocaInst &I, bool SufficientLDS);
128
130
131 void setFunctionLimits(const Function &F);
132
133public:
135
137 IsAMDGCN = TT.isAMDGCN();
139 }
140
141 bool run(Function &F, bool PromoteToLDS);
142};
143
144
145class AMDGPUPromoteAlloca : public FunctionPass {
146public:
147 static char ID;
148
150
152 if (skipFunction(F))
153 return false;
154 if (auto *TPC = getAnalysisIfAvailable())
155 return AMDGPUPromoteAllocaImpl(
157 getAnalysis().getLoopInfo())
158 .run(F, true);
159 return false;
160 }
161
162 StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
163
164 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 }
169};
170
171static unsigned getMaxVGPRs(unsigned LDSBytes, const TargetMachine &TM,
174 return 128;
175
177
179
180
181 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())
182 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();
183
184 unsigned MaxVGPRs = ST.getMaxNumVGPRs(
185 ST.getWavesPerEU(ST.getFlatWorkGroupSizes(F), LDSBytes, F).first,
186 DynamicVGPRBlockSize);
187
188
189
190
191 if (.hasFnAttribute(Attribute::AlwaysInline) &&
193 MaxVGPRs = std::min(MaxVGPRs, 32u);
194 return MaxVGPRs;
195}
196
197}
198
199char AMDGPUPromoteAlloca::ID = 0;
200
202 "AMDGPU promote alloca to vector or LDS", false, false)
203
204
208 "AMDGPU promote alloca to vector or LDS", false, false)
209
211
215 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, true);
219 return PA;
220 }
222}
223
227 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, false);
231 return PA;
232 }
234}
235
237 return new AMDGPUPromoteAlloca();
238}
239
243 while (!WorkList.empty()) {
244 auto *Cur = WorkList.pop_back_val();
245 for (auto &U : Cur->uses()) {
246 Uses.push_back(&U);
247
250 }
251 }
252}
253
254void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
257
258 for (auto *Alloca : Allocas) {
259 LLVM_DEBUG(dbgs() << "Scoring: " << *Alloca << "\n");
260 unsigned &Score = Scores[Alloca];
261
264 for (auto *U : Uses) {
267 continue;
268 unsigned UserScore =
269 1 + (LoopUserWeight * LI.getLoopDepth(Inst->getParent()));
270 LLVM_DEBUG(dbgs() << " [+" << UserScore << "]:\t" << *Inst << "\n");
271 Score += UserScore;
272 }
273 LLVM_DEBUG(dbgs() << " => Final Score:" << Score << "\n");
274 }
275
277 return Scores.at(A) > Scores.at(B);
278 });
279
280
282 dbgs() << "Sorted Worklist:\n";
283 for (auto *A: Allocas)
284 dbgs() << " " << *A << "\n";
285 );
286
287}
288
289void AMDGPUPromoteAllocaImpl::setFunctionLimits(const Function &F) {
290
291
292
293 const int R600MaxVectorRegs = 16;
294 MaxVectorRegs = F.getFnAttributeAsParsedInteger(
295 "amdgpu-promote-alloca-to-vector-max-regs",
296 IsAMDGCN ? PromoteAllocaToVectorMaxRegs : R600MaxVectorRegs);
297 if (PromoteAllocaToVectorMaxRegs.getNumOccurrences())
298 MaxVectorRegs = PromoteAllocaToVectorMaxRegs;
299 VGPRBudgetRatio = F.getFnAttributeAsParsedInteger(
300 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
301 PromoteAllocaToVectorVGPRRatio);
302 if (PromoteAllocaToVectorVGPRRatio.getNumOccurrences())
303 VGPRBudgetRatio = PromoteAllocaToVectorVGPRRatio;
304}
305
306bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) {
308 DL = &Mod->getDataLayout();
309
311 if (.isPromoteAllocaEnabled())
312 return false;
313
314 bool SufficientLDS = PromoteToLDS && hasSufficientLocalMem(F);
315 MaxVGPRs = getMaxVGPRs(CurrentLocalMemUsage, TM, F);
316 setFunctionLimits(F);
317
318 unsigned VectorizationBudget =
319 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
320 : (MaxVGPRs * 32)) /
321 VGPRBudgetRatio;
322
326
327
328 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
329 continue;
331 }
332 }
333
334 sortAllocasToPromote(Allocas);
335
338 const unsigned AllocaCost = DL->getTypeSizeInBits(AI->getAllocatedType());
339
340 if (AllocaCost <= VectorizationBudget) {
341
342
343 if (tryPromoteAllocaToVector(*AI)) {
345 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
346 "Underflow!");
347 VectorizationBudget -= AllocaCost;
348 LLVM_DEBUG(dbgs() << " Remaining vectorization budget:"
349 << VectorizationBudget << "\n");
350 continue;
351 }
352 } else {
353 LLVM_DEBUG(dbgs() << "Alloca too big for vectorization (size:"
354 << AllocaCost << ", budget:" << VectorizationBudget
355 << "): " << *AI << "\n");
356 }
357
358 if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
360 }
361
362
363
364
365
367}
368
373
374
375
376
380
381
382
383
384
385
387 return I->getOperand(0) == AI &&
389}
390
392 Value *Ptr, const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {
394 if ()
396
397 auto I = GEPIdx.find(GEP);
398 assert(I != GEPIdx.end() && "Must have entry for GEP!");
399
400 Value *IndexValue = I->second;
401 assert(IndexValue && "index value missing from GEP index map");
402 return IndexValue;
403}
404
408
409
411 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
413 APInt ConstOffset(BW, 0);
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
434 if (!CurGEP->collectOffset(DL, BW, VarOffsets, ConstOffset))
435 return nullptr;
436
437
438 CurPtr = CurGEP->getPointerOperand();
439 }
440
441 assert(CurPtr == Alloca && "GEP not based on alloca");
442
443 int64_t VecElemSize = DL.getTypeAllocSize(VecElemTy);
444 if (VarOffsets.size() > 1)
445 return nullptr;
446
448 int64_t Rem;
449 APInt::sdivrem(ConstOffset, VecElemSize, IndexQuot, Rem);
450 if (Rem != 0)
451 return nullptr;
452 if (VarOffsets.size() == 0)
453 return ConstantInt::get(Ctx, IndexQuot);
454
456
457 const auto &VarOffset = VarOffsets.front();
458 APInt OffsetQuot;
459 APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
460 if (Rem != 0 || OffsetQuot.isZero())
461 return nullptr;
462
465 return nullptr;
466
467 Offset = Builder.CreateSExtOrTrunc(Offset, Builder.getIntNTy(BW));
468 if (Offset != VarOffset.first)
470
471 if (!OffsetQuot.isOne()) {
473 Offset = Builder.CreateMul(Offset, ConstMul);
476 }
477 if (ConstOffset.isZero())
479
481 Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
484 return IndexAdd;
485}
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
504 unsigned VecStoreSize, unsigned ElementSize,
506 std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
508
509
512 Builder.SetInsertPoint(Inst);
513
514 const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
516 assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
517 const unsigned Size = DL.getTypeStoreSizeInBits(PtrTy);
518 if (!PtrTy->isVectorTy())
519 return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(Size));
521
522
523 assert((Size % NumPtrElts == 0) && "Vector size not divisble");
525 return Builder.CreateBitOrPointerCast(
527 };
528
530
532 case Instruction::Load: {
533 Value *CurVal = GetCurVal();
536
537
539 TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
541 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
543 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
545 CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType());
546 Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
548 return nullptr;
549 }
550 }
551
552
555 const unsigned NumLoadedElts = AccessSize / DL.getTypeStoreSize(VecEltTy);
557 assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
558
560 for (unsigned K = 0; K < NumLoadedElts; ++K) {
562 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
563 SubVec = Builder.CreateInsertElement(
564 SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
565 }
566
568 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
569 else if (SubVecTy->isPtrOrPtrVectorTy())
570 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
571
572 SubVec = Builder.CreateBitOrPointerCast(SubVec, AccessTy);
574 return nullptr;
575 }
576
577
578 Value *ExtractElement = Builder.CreateExtractElement(CurVal, Index);
579 if (AccessTy != VecEltTy)
580 ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
581
583 return nullptr;
584 }
585 case Instruction::Store: {
586
587
588
589
592 Value *Val = SI->getValueOperand();
593
594
596 TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
598 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
600 Val = CreateTempPtrIntCast(Val, AccessTy);
602 Val = CreateTempPtrIntCast(Val, VectorTy);
603 return Builder.CreateBitOrPointerCast(Val, VectorTy);
604 }
605 }
606
607
610 const unsigned NumWrittenElts =
611 AccessSize / DL.getTypeStoreSize(VecEltTy);
612 const unsigned NumVecElts = VectorTy->getNumElements();
614 assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
615
616 if (SubVecTy->isPtrOrPtrVectorTy())
617 Val = CreateTempPtrIntCast(Val, SubVecTy);
619 Val = CreateTempPtrIntCast(Val, AccessTy);
620
621 Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
622
623 Value *CurVec = GetCurVal();
624 for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
625 K < NumElts; ++K) {
627 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
628 CurVec = Builder.CreateInsertElement(
629 CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
630 }
631 return CurVec;
632 }
633
634 if (Val->getType() != VecEltTy)
635 Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
636 return Builder.CreateInsertElement(GetCurVal(), Val, Index);
637 }
638 case Instruction::Call: {
640
642 unsigned NumCopied = Length->getZExtValue() / ElementSize;
646
648 for (unsigned Idx = 0; Idx < VectorTy->getNumElements(); ++Idx) {
649 if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
651 ? SrcBegin++
653 } else {
654 Mask.push_back(Idx);
655 }
656 }
657
658 return Builder.CreateShuffleVector(GetCurVal(), Mask);
659 }
660
662
663
664 Value *Elt = MSI->getOperand(1);
665 const unsigned BytesPerElt = DL.getTypeStoreSize(VecEltTy);
666 if (BytesPerElt > 1) {
667 Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt);
668
669
670
672 Type *PtrInt = Builder.getIntNTy(BytesPerElt * 8);
673 Elt = Builder.CreateBitCast(EltBytes, PtrInt);
674 Elt = Builder.CreateIntToPtr(Elt, VecEltTy);
675 } else
676 Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
677 }
678
679 return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt);
680 }
681
683 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
684 Intr->replaceAllUsesWith(
685 Builder.getIntN(Intr->getType()->getIntegerBitWidth(),
686 DL.getTypeAllocSize(VectorTy)));
687 return nullptr;
688 }
689 }
690
691 llvm_unreachable("Unsupported call when promoting alloca to vector");
692 }
693
694 default:
695 llvm_unreachable("Inconsistency in instructions promotable to vector");
696 }
697
698 llvm_unreachable("Did not return after promoting instruction!");
699}
700
703
704
705
706
707
708
709
710
711
712
713
715 TypeSize AccTS = DL.getTypeStoreSize(AccessTy);
716
717
718
719 if (AccTS * 8 != DL.getTypeSizeInBits(AccessTy))
720 return false;
723 }
724
727}
728
729
730
731template
734
735
736
740
743 auto &BlockUses = UsesByBlock[BB];
744
745
746 if (BlockUses.empty())
747 continue;
748
749
750 if (BlockUses.size() == 1) {
752 continue;
753 }
754
755
757 if (!BlockUses.contains(&Inst))
758 continue;
759
760 Fn(&Inst);
761 }
762
763
764 BlockUses.clear();
765 }
766}
767
768
769
776
778AMDGPUPromoteAllocaImpl::getVectorTypeForAlloca(Type *AllocaTy) const {
779 if (DisablePromoteAllocaToVector) {
780 LLVM_DEBUG(dbgs() << " Promote alloca to vectors is disabled\n");
781 return nullptr;
782 }
783
786 uint64_t NumElems = 1;
787 Type *ElemTy;
788 do {
789 NumElems *= ArrayTy->getNumElements();
790 ElemTy = ArrayTy->getElementType();
792
793
795 if (InnerVectorTy) {
796 NumElems *= InnerVectorTy->getNumElements();
797 ElemTy = InnerVectorTy->getElementType();
798 }
799
801 unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;
802 if (ElementSize > 0) {
803 unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
804
805
806
807
808 if (NumElems * ElementSize != AllocaSize)
809 NumElems = AllocaSize / ElementSize;
810 if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
812 }
813 }
814 }
815 if (!VectorTy) {
816 LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
817 return nullptr;
818 }
819
820 const unsigned MaxElements =
821 (MaxVectorRegs * 32) / DL->getTypeSizeInBits(VectorTy->getElementType());
822
826 << " has an unsupported number of elements\n");
827 return nullptr;
828 }
829
831 unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy);
832 if (ElementSizeInBits != DL->getTypeAllocSizeInBits(VecEltTy)) {
833 LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "
834 "does not match the type's size\n");
835 return nullptr;
836 }
837
838 return VectorTy;
839}
840
841
842bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
843 LLVM_DEBUG(dbgs() << "Trying to promote to vectors: " << Alloca << '\n');
844
846 FixedVectorType *VectorTy = getVectorTypeForAlloca(AllocaTy);
847 if (!VectorTy)
848 return false;
849
850 std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
856
858 LLVM_DEBUG(dbgs() << " Cannot promote alloca to vector: " << Msg << "\n"
859 << " " << *Inst << "\n");
860 for (auto *Inst : reverse(NewGEPInsts))
862 return false;
863 };
864
867
868 LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n");
869
871 unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8;
872 assert(ElementSize > 0);
873 for (auto *U : Uses) {
875
877
880 return RejectUser(Inst, "pointer is being stored");
881
884 return RejectUser(Inst, "unsupported load/store as aggregate");
886
887
890 if (!IsSimple)
891 return RejectUser(Inst, "not a simple load or store");
892
893 Ptr = Ptr->stripPointerCasts();
894
895
896 if (Ptr == &Alloca && DL->getTypeStoreSize(Alloca.getAllocatedType()) ==
897 DL->getTypeStoreSize(AccessTy)) {
899 continue;
900 }
901
903 return RejectUser(Inst, "not a supported access type");
904
906 continue;
907 }
908
910
911
913 if (!Index)
914 return RejectUser(Inst, "cannot compute vector index for GEP");
915
918 continue;
919 }
920
924 continue;
925 }
926
928 if (TransferInst->isVolatile())
929 return RejectUser(Inst, "mem transfer inst is volatile");
930
932 if (!Len || (Len->getZExtValue() % ElementSize))
933 return RejectUser(Inst, "mem transfer inst length is non-constant or "
934 "not a multiple of the vector element size");
935
936 if (TransferInfo.try_emplace(TransferInst).second) {
939 }
940
941 auto getPointerIndexOfAlloca = [&](Value *Ptr) -> ConstantInt * {
943 if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
944 return nullptr;
945
947 };
948
949 unsigned OpNum = U->getOperandNo();
950 MemTransferInfo *TI = &TransferInfo[TransferInst];
951 if (OpNum == 0) {
952 Value *Dest = TransferInst->getDest();
954 if (!Index)
955 return RejectUser(Inst, "could not calculate constant dest index");
957 } else {
959 Value *Src = TransferInst->getSource();
961 if (!Index)
962 return RejectUser(Inst, "could not calculate constant src index");
964 }
965 continue;
966 }
967
969 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
971 continue;
972 }
973 }
974
975
978 return RejectUser(Inst, "assume-like intrinsic cannot have any users");
980 continue;
981 }
982
984 return isAssumeLikeIntrinsic(cast(U));
985 })) {
987 continue;
988 }
989
990 return RejectUser(Inst, "unhandled alloca user");
991 }
992
993 while (!DeferredInsts.empty()) {
996
997
998 MemTransferInfo &Info = TransferInfo[TransferInst];
999 if (.SrcIndex ||
.DestIndex)
1000 return RejectUser(
1001 Inst, "mem transfer inst is missing constant src and/or dst index");
1002 }
1003
1004 LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
1005 << *VectorTy << '\n');
1006 const unsigned VecStoreSize = DL->getTypeStoreSize(VectorTy);
1007
1008
1009
1011 Updater.Initialize(VectorTy, "promotealloca");
1012
1016
1017 Value *AllocaInitValue =
1019 AllocaInitValue->takeName(&Alloca);
1020
1022
1023
1024
1025
1026
1030 auto GetCurVal = [&]() -> Value * {
1032 return CurVal;
1033
1034 if (!Placeholders.empty() && Placeholders.back()->getParent() == BB)
1035 return Placeholders.back();
1036
1037
1038
1042 Placeholders.push_back(Placeholder);
1043 return Placeholders.back();
1044 };
1045
1048 TransferInfo, GEPVectorIdx, GetCurVal);
1049 if (Result)
1051 });
1052
1053
1054 for (Instruction *Placeholder : Placeholders) {
1055 Placeholder->replaceAllUsesWith(
1057 Placeholder->eraseFromParent();
1058 }
1059
1060
1061
1065 I->eraseFromParent();
1066 }
1067
1068
1070 I->dropDroppableUses();
1072 I->eraseFromParent();
1073 }
1074
1075
1078 return true;
1079}
1080
1081std::pair<Value *, Value *>
1082AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
1085
1086 if (!IsAMDHSA) {
1088 Builder.CreateIntrinsic(Intrinsic::r600_read_local_size_y, {});
1090 Builder.CreateIntrinsic(Intrinsic::r600_read_local_size_z, {});
1091
1092 ST.makeLIDRangeMetadata(LocalSizeY);
1093 ST.makeLIDRangeMetadata(LocalSizeZ);
1094
1095 return std::pair(LocalSizeY, LocalSizeZ);
1096 }
1097
1098
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1133 Builder.CreateIntrinsic(Intrinsic::amdgcn_dispatch_ptr, {});
1134 DispatchPtr->addRetAttr(Attribute::NoAlias);
1135 DispatchPtr->addRetAttr(Attribute::NonNull);
1136 F.removeFnAttr("amdgpu-no-dispatch-ptr");
1137
1138
1140
1142
1143
1144
1145
1148
1151
1153 LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
1154 LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
1155 ST.makeLIDRangeMetadata(LoadZU);
1156
1157
1159
1160 return std::pair(Y, LoadZU);
1161}
1162
1163Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
1164 unsigned N) {
1169
1170 switch (N) {
1171 case 0:
1172 IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
1174 AttrName = "amdgpu-no-workitem-id-x";
1175 break;
1176 case 1:
1177 IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
1179 AttrName = "amdgpu-no-workitem-id-y";
1180 break;
1181
1182 case 2:
1183 IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
1185 AttrName = "amdgpu-no-workitem-id-z";
1186 break;
1187 default:
1189 }
1190
1193 ST.makeLIDRangeMetadata(CI);
1194 F->removeFnAttr(AttrName);
1195
1196 return CI;
1197}
1198
1201 if ()
1202 return false;
1203
1204 switch (II->getIntrinsicID()) {
1205 case Intrinsic::memcpy:
1206 case Intrinsic::memmove:
1207 case Intrinsic::memset:
1208 case Intrinsic::lifetime_start:
1209 case Intrinsic::lifetime_end:
1210 case Intrinsic::invariant_start:
1211 case Intrinsic::invariant_end:
1212 case Intrinsic::launder_invariant_group:
1213 case Intrinsic::strip_invariant_group:
1214 case Intrinsic::objectsize:
1215 return true;
1216 default:
1217 return false;
1218 }
1219}
1220
1221bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1223 int OpIdx1) const {
1224
1226 if (Val == OtherOp)
1228
1230 return true;
1231
1232
1235 return false;
1236
1237
1238
1239
1240
1241
1242
1243 if (OtherObj != BaseAlloca) {
1245 dbgs() << "Found a binary instruction with another alloca object\n");
1246 return false;
1247 }
1248
1249 return true;
1250}
1251
1252bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1253 Value *BaseAlloca, Value *Val, std::vector<Value *> &WorkList) const {
1254
1257 continue;
1258
1261 return false;
1262
1263 WorkList.push_back(User);
1264 continue;
1265 }
1266
1268 if (UseInst->getOpcode() == Instruction::PtrToInt)
1269 return false;
1270
1272 if (LI->isVolatile())
1273 return false;
1274 continue;
1275 }
1276
1278 if (SI->isVolatile())
1279 return false;
1280
1281
1282 if (SI->getPointerOperand() != Val)
1283 return false;
1284 continue;
1285 }
1286
1288 if (RMW->isVolatile())
1289 return false;
1290 continue;
1291 }
1292
1294 if (CAS->isVolatile())
1295 return false;
1296 continue;
1297 }
1298
1299
1300
1302 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1303 return false;
1304
1305
1306 WorkList.push_back(ICmp);
1307 continue;
1308 }
1309
1311
1312
1313 if (->isInBounds())
1314 return false;
1316
1317
1318 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
1319 return false;
1321
1322
1323
1324
1325 switch (Phi->getNumIncomingValues()) {
1326 case 1:
1327 break;
1328 case 2:
1329 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1330 return false;
1331 break;
1332 default:
1333 return false;
1334 }
1336
1337
1338
1339
1340
1341
1342
1343 return false;
1344 }
1345
1346 WorkList.push_back(User);
1347 if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
1348 return false;
1349 }
1350
1351 return true;
1352}
1353
1354bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
1355
1358
1359
1360
1361
1362 for (Type *ParamTy : FTy->params()) {
1365 LocalMemLimit = 0;
1366 LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
1367 "local memory disabled.\n");
1368 return false;
1369 }
1370 }
1371
1372 LocalMemLimit = ST.getAddressableLocalMemorySize();
1373 if (LocalMemLimit == 0)
1374 return false;
1375
1379
1381 for (const User *U : Val->users()) {
1383 if (Use->getFunction() == &F)
1384 return true;
1385 } else {
1387 if (VisitedConstants.insert(C).second)
1389 }
1390 }
1391
1392 return false;
1393 };
1394
1397 continue;
1398
1399 if (visitUsers(&GV, &GV)) {
1400 UsedLDS.insert(&GV);
1402 continue;
1403 }
1404
1405
1406
1407 while (.empty()) {
1409 if (visitUsers(&GV, C)) {
1410 UsedLDS.insert(&GV);
1412 break;
1413 }
1414 }
1415 }
1416
1420
1422 Align Alignment =
1424 uint64_t AllocSize = DL.getTypeAllocSize(GV->getValueType());
1425
1426
1427
1429 LocalMemLimit = 0;
1430 LLVM_DEBUG(dbgs() << "Function has a reference to externally allocated "
1431 "local memory. Promoting to local memory "
1432 "disabled.\n");
1433 return false;
1434 }
1435
1436 AllocatedSizes.emplace_back(AllocSize, Alignment);
1437 }
1438
1439
1440
1441
1442
1444
1445
1446 CurrentLocalMemUsage = 0;
1447
1448
1449
1450
1451
1452 for (auto Alloc : AllocatedSizes) {
1453 CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Alloc.second);
1454 CurrentLocalMemUsage += Alloc.first;
1455 }
1456
1457 unsigned MaxOccupancy =
1458 ST.getWavesPerEU(ST.getFlatWorkGroupSizes(F), CurrentLocalMemUsage, F)
1459 .second;
1460
1461
1462 unsigned MaxSizeWithWaveCount =
1463 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
1464
1465
1466 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1467 return false;
1468
1469 LocalMemLimit = MaxSizeWithWaveCount;
1470
1471 LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage
1472 << " bytes of LDS\n"
1473 << " Rounding size to " << MaxSizeWithWaveCount
1474 << " with a maximum occupancy of " << MaxOccupancy << '\n'
1475 << " and " << (LocalMemLimit - CurrentLocalMemUsage)
1476 << " available for promotion\n");
1477
1478 return true;
1479}
1480
1481
1482bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
1483 bool SufficientLDS) {
1484 LLVM_DEBUG(dbgs() << "Trying to promote to LDS: " << I << '\n');
1485
1486 if (DisablePromoteAllocaToLDS) {
1487 LLVM_DEBUG(dbgs() << " Promote alloca to LDS is disabled\n");
1488 return false;
1489 }
1490
1493
1496
1497
1498
1499
1500 switch (CC) {
1503 break;
1504 default:
1507 << " promote alloca to LDS not supported with calling convention.\n");
1508 return false;
1509 }
1510
1511
1512 if (!SufficientLDS)
1513 return false;
1514
1516 unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
1517
1518 Align Alignment =
1519 DL.getValueOrABITypeAlignment(I.getAlign(), I.getAllocatedType());
1520
1521
1522
1523
1524
1525
1526
1527 uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);
1528 uint32_t AllocSize =
1529 WorkGroupSize * DL.getTypeAllocSize(I.getAllocatedType());
1530 NewSize += AllocSize;
1531
1532 if (NewSize > LocalMemLimit) {
1534 << " bytes of local memory not available to promote\n");
1535 return false;
1536 }
1537
1538 CurrentLocalMemUsage = NewSize;
1539
1540 std::vector<Value *> WorkList;
1541
1542 if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
1543 LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");
1544 return false;
1545 }
1546
1547 LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n");
1548
1550
1554 Twine(F->getName()) + Twine('.') + I.getName(), nullptr,
1558
1559 Value *TCntY, *TCntZ;
1560
1561 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1562 Value *TIdX = getWorkitemID(Builder, 0);
1563 Value *TIdY = getWorkitemID(Builder, 1);
1564 Value *TIdZ = getWorkitemID(Builder, 2);
1565
1566 Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
1567 Tmp0 = Builder.CreateMul(Tmp0, TIdX);
1568 Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
1570 TID = Builder.CreateAdd(TID, TIdZ);
1571
1574
1577 I.replaceAllUsesWith(Offset);
1578 I.eraseFromParent();
1579
1581
1583
1584 for (Value *V : WorkList) {
1586 if () {
1590
1594
1597
1598 continue;
1599 }
1600
1601
1602
1604 continue;
1605
1606 assert(V->getType()->isPtrOrPtrVectorTy());
1607
1608 Type *NewTy = V->getType()->getWithNewType(NewPtrTy);
1609 V->mutateType(NewTy);
1610
1611
1615
1619 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
1621 Phi->getIncomingValue(I)))
1623 }
1624 }
1625
1626 continue;
1627 }
1628
1632 case Intrinsic::lifetime_start:
1633 case Intrinsic::lifetime_end:
1634
1636 continue;
1637 case Intrinsic::memcpy:
1638 case Intrinsic::memmove:
1639
1640
1641
1643 continue;
1644 case Intrinsic::memset: {
1650 continue;
1651 }
1652 case Intrinsic::invariant_start:
1653 case Intrinsic::invariant_end:
1654 case Intrinsic::launder_invariant_group:
1655 case Intrinsic::strip_invariant_group: {
1657 if (Intr->getIntrinsicID() == Intrinsic::invariant_start) {
1659 } else if (Intr->getIntrinsicID() == Intrinsic::invariant_end) {
1662 }
1671 continue;
1672 }
1673 case Intrinsic::objectsize: {
1675
1677 Intrinsic::objectsize,
1682 continue;
1683 }
1684 default:
1686 llvm_unreachable("Don't know how to promote alloca intrinsic use.");
1687 }
1688 }
1689
1693 assert(ID == Intrinsic::memcpy || ID == Intrinsic::memmove);
1694
1697 ID, MI->getRawDest(), MI->getDestAlign(), MI->getRawSource(),
1698 MI->getSourceAlign(), MI->getLength(), MI->isVolatile());
1699
1700 for (unsigned I = 0; I != 2; ++I) {
1702 B->addDereferenceableParamAttr(I, Bytes);
1703 }
1704 }
1705
1707 }
1708
1709 return true;
1710}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool isOne() const
Determine if this is a value of 1.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Class to represent function types.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const Function & getFunction() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
MaybeAlign getAlign() const
Returns the alignment of the given variable.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert a memset to the specified pointer and the specified value.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
The legacy pass manager's analysis pass to compute loop information.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Triple - Helper class for working with autoconf configuration names.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
Definition AMDGPUPromoteAlloca.cpp:236
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
char & AMDGPUPromoteAllocaID
Definition AMDGPUPromoteAlloca.cpp:210
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
ConstantInt * SrcIndex
Definition AMDGPUPromoteAlloca.cpp:370
ConstantInt * DestIndex
Definition AMDGPUPromoteAlloca.cpp:371
This struct is a compact representation of a valid (non-zero power of two) alignment.
A MapVector that performs no allocations if smaller than a certain size.
Function object to check whether the second component of a container supported by std::get (like std:...