LLVM: lib/Target/AMDGPU/AMDGPUCallLowering.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
25#include "llvm/IR/IntrinsicsAMDGPU.h"
26
27#define DEBUG_TYPE "amdgpu-call-lowering"
28
29using namespace llvm;
30
31namespace {
32
33
37
38
40 }
41
43}
44
48 : OutgoingValueHandler(B, MRI), MIB(MIB) {}
49
51
56 }
57
62 }
63
66 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
67
68
69
70
72 = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
73 if (TRI->isSGPRReg(MRI, PhysReg)) {
74 LLT Ty = MRI.getType(ExtReg);
76 if (Ty != S32) {
77
78
79 assert(Ty.getSizeInBits() == 32);
80 if (Ty.isPointer())
81 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
82 else
83 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
84 }
85
86 auto ToSGPR = MIRBuilder
87 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
88 {MRI.getType(ExtReg)})
89 .addReg(ExtReg);
90 ExtReg = ToSGPR.getReg(0);
91 }
92
93 MIRBuilder.buildCopy(PhysReg, ExtReg);
95 }
96};
97
100
102 : IncomingValueHandler(B, MRI) {}
103
107 auto &MFI = MIRBuilder.getMF().getFrameInfo();
108
109
110
111 const bool IsImmutable = !Flags.isByVal();
112 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
114 auto AddrReg = MIRBuilder.buildFrameIndex(
116 StackUsed = std::max(StackUsed, Size + Offset);
117 return AddrReg.getReg(0);
118 }
119
122 markPhysRegUsed(PhysReg);
123
125
126
127 auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
128
129
130
131 auto Extended =
132 buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
133 MIRBuilder.buildTrunc(ValVReg, Extended);
134 return;
135 }
136
137 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
138 }
139
144
148 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
149 }
150
151
152
153
154 virtual void markPhysRegUsed(unsigned PhysReg) = 0;
155};
156
157struct FormalArgHandler : public AMDGPUIncomingArgHandler {
159 : AMDGPUIncomingArgHandler(B, MRI) {}
160
161 void markPhysRegUsed(unsigned PhysReg) override {
162 MIRBuilder.getMBB().addLiveIn(PhysReg);
163 }
164};
165
166struct CallReturnHandler : public AMDGPUIncomingArgHandler {
169 : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
170
171 void markPhysRegUsed(unsigned PhysReg) override {
173 }
174
176};
177
178struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
179
180
181 int FPDiff;
182
183
185
186 bool IsTailCall;
187
190 bool IsTailCall = false, int FPDiff = 0)
191 : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),
192 IsTailCall(IsTailCall) {}
193
200
201 if (IsTailCall) {
206 return FIReg.getReg(0);
207 }
208
210
213 if (ST.enableFlatScratch()) {
214
217 } else {
218
219
220
221 SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
223 }
224 }
225
227
228 auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
230 return AddrReg.getReg(0);
231 }
232
239
243 MIRBuilder.buildStore(ValVReg, Addr, *MMO);
244 }
245
247 unsigned ValRegIndex, Register Addr, LLT MemTy,
251 ? extendRegister(Arg.Regs[ValRegIndex], VA)
252 : Arg.Regs[ValRegIndex];
253 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
254 }
255};
256}
257
261
262
264 switch (MIOpc) {
265 case TargetOpcode::G_SEXT:
267 case TargetOpcode::G_ZEXT:
269 case TargetOpcode::G_ANYEXT:
271 default:
273 }
274}
275
276bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
279 bool IsVarArg) const {
280
282 return true;
283
286 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
288
289 return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
290}
291
292
293
297 if (!Val)
298 return true;
299
300 auto &MF = B.getMF();
303 MachineRegisterInfo *MRI = B.getMRI();
304 LLVMContext &Ctx = F.getContext();
305
308
312 "For each split Type there should be exactly one VReg.");
313
315
316 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
317 EVT VT = SplitEVTs[i];
320 setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
321
323 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
324 if (RetInfo.Flags[0].isSExt()) {
325 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
326 ExtendOp = TargetOpcode::G_SEXT;
327 } else if (RetInfo.Flags[0].isZExt()) {
328 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
329 ExtendOp = TargetOpcode::G_ZEXT;
330 }
331
332 EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
334 if (ExtVT != VT) {
337 Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
338 }
339 }
340
341 if (Reg != RetInfo.Regs[0]) {
342 RetInfo.Regs[0] = Reg;
343
344 setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
345 }
346
348 }
349
350 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
351
353 AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
355 CC, F.isVarArg());
356}
357
361
365
366 assert(!Val == VRegs.empty() && "Return value without a vreg");
367
368 CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
370 const bool IsWaveEnd =
372 if (IsWaveEnd) {
373 B.buildInstr(AMDGPU::S_ENDPGM)
374 .addImm(0);
375 return true;
376 }
377
379 unsigned ReturnOpc = IsWholeWave ? AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN
380 : IsShader ? AMDGPU::SI_RETURN_TO_EPILOG
381 : AMDGPU::SI_RETURN;
382 auto Ret = B.buildInstrNoInsert(ReturnOpc);
383
386 else if (!lowerReturnVal(B, Val, VRegs, Ret))
387 return false;
388
389 if (IsWholeWave)
390 addOriginalExecToReturn(B.getMF(), Ret);
391
392
393
394 B.insertInstr(Ret);
395 return true;
396}
397
405 Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
406
408
409 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
410}
411
414 Align Alignment) const {
420
422
425 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
426
427 unsigned Idx = 0;
428 for (ArgInfo &SplitArg : SplitArgs) {
429 Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
430 lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
431
433 if (SplitArg.Flags[0].isPointer()) {
434
435 LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),
438 : PtrTy;
439 }
440
442 PtrInfo,
446
447 assert(SplitArg.Regs.size() == 1);
448
449 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
450 ++Idx;
451 }
452}
453
454
460
463 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
464 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
465 CCInfo.AllocateReg(PrivateSegmentBufferReg);
466 }
467
470 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
472 }
473
476 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
478 }
479
484 Register VReg = MRI.createGenericVirtualRegister(P4);
485 MRI.addLiveIn(InputPtrReg, VReg);
486 B.getMBB().addLiveIn(InputPtrReg);
487 B.buildCopy(VReg, InputPtrReg);
489 }
490
493 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
495 }
496
498 Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
499 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
501 }
502
504 Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(TRI);
505 MF.addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
506 CCInfo.AllocateReg(PrivateSegmentSizeReg);
507 }
508
509
510
511}
512
523
525 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
526
528
529 unsigned i = 0;
530 const Align KernArgBaseAlign(16);
532 uint64_t ExplicitArgOffset = 0;
533
534
535 for (auto &Arg : F.args()) {
536
537 if (Arg.hasAttribute("amdgpu-hidden-argument")) {
538 LLVM_DEBUG(dbgs() << "Preloading hidden arguments is not supported\n");
539 return false;
540 }
541
542 const bool IsByRef = Arg.hasByRefAttr();
543 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
544 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
545 if (AllocSize == 0)
546 continue;
547
548 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
549 Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
550
551 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
552 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
553
554 if (Arg.use_empty()) {
555 ++i;
556 continue;
557 }
558
560
561 if (IsByRef) {
562 unsigned ByRefAS = cast(Arg.getType())->getAddressSpace();
563
565 "expected only one register for byval pointers");
567 lowerParameterPtr(VRegs[i][0], B, ArgOffset);
568 } else {
570 Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
571 lowerParameterPtr(PtrReg, B, ArgOffset);
572
573 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
574 }
575 } else {
576 ArgInfo OrigArg(VRegs[i], Arg, i);
577 const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;
579 lowerParameter(B, OrigArg, ArgOffset, Alignment);
580 }
581
582 ++i;
583 }
584
585 if (Info->getNumKernargPreloadedSGPRs())
586 Info->setNumWaveDispatchSGPRs(Info->getNumUserSGPRs());
587
588 TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
589 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
590 return true;
591}
592
597
598
599
600
603
606
614
616 CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
618
620 Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
621 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
622 CCInfo.AllocateReg(ImplicitBufferPtrReg);
623 }
624
625
627 Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
628 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
630 }
631
633 unsigned Idx = 0;
634 unsigned PSInputNum = 0;
635
636
637
640
641 for (auto &Arg : F.args()) {
642 if (DL.getTypeStoreSize(Arg.getType()) == 0)
643 continue;
644
645 if (Info->isWholeWaveFunction() && Idx == 0) {
646 assert(VRegs[Idx].size() == 1 && "Expected only one register");
647
648
649 B.buildInstr(AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
650 .addDef(VRegs[Idx][0]);
651
652 ++Idx;
653 continue;
654 }
655
656 const bool InReg = Arg.hasAttribute(Attribute::InReg);
657
658 if (Arg.hasAttribute(Attribute::SwiftSelf) ||
659 Arg.hasAttribute(Attribute::SwiftError) ||
660 Arg.hasAttribute(Attribute::Nest))
661 return false;
662
664 const bool ArgUsed = !Arg.use_empty();
665 bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
666
667 if (!SkipArg) {
668 Info->markPSInputAllocated(PSInputNum);
669 if (ArgUsed)
670 Info->markPSInputEnabled(PSInputNum);
671 }
672
673 ++PSInputNum;
674
675 if (SkipArg) {
676 for (Register R : VRegs[Idx])
677 B.buildUndef(R);
678
679 ++Idx;
680 continue;
681 }
682 }
683
684 ArgInfo OrigArg(VRegs[Idx], Arg, Idx);
685 const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
687
689 ++Idx;
690 }
691
692
693
694
695
696
697
698
699
700
701
702
703
704
706 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
707 ((Info->getPSInputAddr() & 0xF) == 0 &&
708 Info->isPSInputAllocated(11))) {
711 Info->markPSInputAllocated(0);
712 Info->markPSInputEnabled(0);
713 }
714
715 if (Subtarget.isAmdPalOS()) {
716
717
718
719
720
721
722
723
724 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
725 if ((PsInputBits & 0x7F) == 0 ||
726 ((PsInputBits & 0xF) == 0 &&
727 (PsInputBits >> 11 & 1)))
728 Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));
729 }
730 }
731
733 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
734
735 if (.empty())
737
738 if (!IsEntryFunc && !IsGraphics) {
739
740 TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
741
742 if (!Subtarget.enableFlatScratch())
743 CCInfo.AllocateReg(Info->getScratchRSrcReg());
744 TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
745 }
746
749 return false;
750
751 if (IsEntryFunc) {
752
753
754 Info->setNumWaveDispatchSGPRs(
756 Info->setNumWaveDispatchVGPRs(
758 }
759
762 return false;
763
765
766
767 if (IsEntryFunc)
768 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
769
770
771
772
773
774 Info->setBytesInStackArgArea(StackSize);
775
776
778
779 return true;
780}
781
784 SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
787
788
789
790 if (!Info.CB)
791 return true;
792
795
798
799
800
801
802
812 };
813
814 static constexpr StringLiteral ImplicitAttrNames[][2] = {
815 {"amdgpu-no-dispatch-ptr", ""},
816 {"amdgpu-no-queue-ptr", ""},
817 {"amdgpu-no-implicitarg-ptr", ""},
818 {"amdgpu-no-dispatch-id", ""},
819 {"amdgpu-no-workgroup-id-x", "amdgpu-no-cluster-id-x"},
820 {"amdgpu-no-workgroup-id-y", "amdgpu-no-cluster-id-y"},
821 {"amdgpu-no-workgroup-id-z", "amdgpu-no-cluster-id-z"},
822 {"amdgpu-no-lds-kernel-id", ""},
823 };
824
826
830
831 unsigned I = 0;
832 for (auto InputID : InputRegs) {
835 LLT ArgTy;
836
837
839 return AttrName.empty() || Info.CB->hasFnAttr(AttrName);
840 }))
841 continue;
842
843 std::tie(OutgoingArg, ArgRC, ArgTy) =
845 if (!OutgoingArg)
846 continue;
847
850 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
851 CallerArgInfo.getPreloadedValue(InputID);
852 assert(IncomingArgRC == ArgRC);
853
854 Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
855
856 if (IncomingArg) {
857 LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
859 LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
861 std::optional<uint32_t> Id =
863 if (Id) {
865 } else {
867 }
868 } else {
869
870
872 }
873
875 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
878 } else {
879 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
880 return false;
881 }
882 }
883
884
885
888 LLT ArgTy;
889
890 std::tie(OutgoingArg, ArgRC, ArgTy) =
892 if (!OutgoingArg)
893 std::tie(OutgoingArg, ArgRC, ArgTy) =
895 if (!OutgoingArg)
896 std::tie(OutgoingArg, ArgRC, ArgTy) =
898 if (!OutgoingArg)
899 return false;
900
901 auto WorkitemIDX =
903 auto WorkitemIDY =
905 auto WorkitemIDZ =
907
908 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
909 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
910 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
912
913 const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
914 const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
915 const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
916
917
918
920 if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
921 NeedWorkItemIDX) {
922 if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
923 InputReg = MRI.createGenericVirtualRegister(S32);
924 LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArgX,
925 std::get<1>(WorkitemIDX),
926 std::get<2>(WorkitemIDX));
927 } else {
929 }
930 }
931
932 if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
933 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
935 LI->buildLoadInputValue(Y, MIRBuilder, IncomingArgY,
936 std::get<1>(WorkitemIDY), std::get<2>(WorkitemIDY));
937
939 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
940 }
941
942 if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
943 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
945 LI->buildLoadInputValue(Z, MIRBuilder, IncomingArgZ,
946 std::get<1>(WorkitemIDZ), std::get<2>(WorkitemIDZ));
947
949 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
950 }
951
952 if (!InputReg &&
953 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
954 InputReg = MRI.createGenericVirtualRegister(S32);
955 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
956
957
958
959
961 } else {
962
963
965 IncomingArgX ? *IncomingArgX :
966 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
967 LI->buildLoadInputValue(InputReg, MIRBuilder, &IncomingArg,
968 &AMDGPU::VGPR_32RegClass, S32);
969 }
970 }
971
973 if (InputReg)
974 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
975
978 } else {
979 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
980 return false;
981 }
982
983 return true;
984}
985
986
987
988static std::pair<CCAssignFn *, CCAssignFn *>
992
994 bool IsTailCall, bool IsWave32,
996 bool IsDynamicVGPRChainCall = false) {
997
999 "Indirect calls can't be tail calls, "
1000 "because the address can be divergent");
1001 if (!IsTailCall)
1002 return AMDGPU::G_SI_CALL;
1003
1005 if (IsDynamicVGPRChainCall)
1006 return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32_DVGPR
1007 : AMDGPU::SI_CS_CHAIN_TC_W64_DVGPR;
1008 return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
1009 }
1010
1013 return AMDGPU::SI_TCRETURN_GFX_WholeWave;
1014
1016 return AMDGPU::SI_TCRETURN_GFX;
1017
1018 return AMDGPU::SI_TCRETURN;
1019}
1020
1021
1025 bool IsDynamicVGPRChainCall = false) {
1026 if (Info.Callee.isReg()) {
1029 } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
1030
1031
1035 CallInst.addReg(Ptr.getReg(0));
1036
1037 if (IsDynamicVGPRChainCall) {
1038
1040 } else
1042 } else
1043 return false;
1044
1045 return true;
1046}
1047
1054
1055
1056 if (CalleeCC == CallerCC)
1057 return true;
1058
1060
1061
1062 const auto *TRI = ST.getRegisterInfo();
1063
1064 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1065 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1066 if (->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1067 return false;
1068
1069
1073 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1075
1078 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1080
1081
1082
1084 CalleeAssignFnVarArg);
1086 CallerAssignFnVarArg);
1087 return resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner);
1088}
1089
1093
1094 if (OutArgs.empty())
1095 return true;
1096
1101
1104 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1105
1106
1108 CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
1110
1112 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
1113 return false;
1114 }
1115
1116
1119 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
1120 return false;
1121 }
1122
1123
1126 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
1129}
1130
1134
1135 if (!Info.IsTailCall)
1136 return false;
1137
1138
1139
1140 if (Info.Callee.isReg())
1141 return false;
1142
1147
1149 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1150
1151
1152 if (!CallerPreserved)
1153 return false;
1154
1156 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1157 return false;
1158 }
1159
1161 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1162 })) {
1163 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "
1164 "or swifterror arguments\n");
1165 return false;
1166 }
1167
1168
1172 }
1173
1174
1175
1179 << "... Caller and callee have incompatible calling conventions.\n");
1180 return false;
1181 }
1182
1183
1184
1185
1187 return false;
1188
1189 LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");
1190 return true;
1191}
1192
1193
1194
1195
1200 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
1201 if (!ST.enableFlatScratch()) {
1202
1203
1206
1208 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
1209 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
1210
1211 MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);
1213 }
1214
1215 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1218 }
1219}
1220
1221namespace {
1222
1223
1224enum ChainCallArgIdx {
1225 Exec = 1,
1226 Flags = 4,
1227 NumVGPRs = 5,
1228 FallbackExec = 6,
1229 FallbackCallee = 7,
1230};
1231}
1232
1244
1245
1247
1248
1252 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1253
1255 if (!IsSibCall)
1256 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
1257
1259 bool IsDynamicVGPRChainCall = false;
1260
1261 if (IsChainCall) {
1262 ArgInfo FlagsArg = Info.OrigArgs[ChainCallArgIdx::Flags];
1264 if (FlagsValue.isZero()) {
1265 if (Info.OrigArgs.size() != 5) {
1266 LLVM_DEBUG(dbgs() << "No additional args allowed if flags == 0\n");
1267 return false;
1268 }
1270 IsDynamicVGPRChainCall = true;
1271
1272 if (Info.OrigArgs.size() != 8) {
1273 LLVM_DEBUG(dbgs() << "Expected 3 additional args\n");
1274 return false;
1275 }
1276
1277
1278 if (!ST.isWave32()) {
1280 F, "dynamic VGPR mode is only supported for wave32"));
1281 return false;
1282 }
1283
1284 ArgInfo FallbackExecArg = Info.OrigArgs[ChainCallArgIdx::FallbackExec];
1285 assert(FallbackExecArg.Regs.size() == 1 &&
1286 "Expected single register for fallback EXEC");
1287 if (!FallbackExecArg.Ty->isIntegerTy(ST.getWavefrontSize())) {
1288 LLVM_DEBUG(dbgs() << "Bad type for fallback EXEC\n");
1289 return false;
1290 }
1291 }
1292 }
1293
1294 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true,
1295 ST.isWave32(), CalleeCC, IsDynamicVGPRChainCall);
1297
1298 if (FuncInfo->isWholeWaveFunction())
1299 addOriginalExecToReturn(MF, MIB);
1300
1301
1302 unsigned CalleeIdx = MIB->getNumOperands();
1303
1305 return false;
1306
1307
1308
1309 MIB.addImm(0);
1310
1311
1312
1313 if (IsChainCall) {
1314 auto AddRegOrImm = [&](const ArgInfo &Arg) {
1316 MIB.addImm(CI->getSExtValue());
1317 } else {
1318 MIB.addReg(Arg.Regs[0]);
1319 unsigned Idx = MIB->getNumOperands() - 1;
1321 MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),
1322 MIB->getOperand(Idx), Idx));
1323 }
1324 };
1325
1326 ArgInfo ExecArg = Info.OrigArgs[ChainCallArgIdx::Exec];
1327 assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC");
1328
1329 if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize())) {
1331 return false;
1332 }
1333
1334 AddRegOrImm(ExecArg);
1335 if (IsDynamicVGPRChainCall)
1336 std::for_each(Info.OrigArgs.begin() + ChainCallArgIdx::NumVGPRs,
1337 Info.OrigArgs.end(), AddRegOrImm);
1338 }
1339
1340
1341 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1342 MIB.addRegMask(Mask);
1343
1344
1345
1346
1347
1348
1349 int FPDiff = 0;
1350
1351
1352
1353
1354 unsigned NumBytes = 0;
1355 if (!IsSibCall) {
1356
1357
1358
1359 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1361 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1362
1363
1366 return false;
1367
1368
1369
1371
1372
1373
1374
1375 FPDiff = NumReusableBytes - NumBytes;
1376
1377
1378
1379
1380
1381
1383 "unaligned stack on tail call");
1384 }
1385
1387 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1388
1389
1390
1391
1393
1397
1398 if ((MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1399 return false;
1400 }
1401
1403
1405 return false;
1406
1407
1408 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);
1409 if ((Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1410 return false;
1411
1412 if (Info.ConvergenceCtrlToken) {
1414 }
1416 ImplicitArgRegs);
1417
1418
1419
1420 if (!IsSibCall) {
1421 MIB->getOperand(CalleeIdx + 1).setImm(FPDiff);
1423
1424
1425
1426
1428 }
1429
1430
1432
1433
1434
1435 if (MIB->getOpcode() == AMDGPU::SI_TCRETURN_GFX_WholeWave) {
1436 MIB->getOperand(0).setReg(
1438 *MIB, MIB->getDesc(), MIB->getOperand(0), 0));
1439 }
1440
1441
1442
1443
1444
1445
1446
1447 if (MIB->getOperand(CalleeIdx).isReg()) {
1449 MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),
1450 MIB->getOperand(CalleeIdx), CalleeIdx));
1451 }
1452
1454 Info.LoweredTailCall = true;
1455 return true;
1456}
1457
1458
1461 ArgInfo Callee = Info.OrigArgs[0];
1462 ArgInfo SGPRArgs = Info.OrigArgs[2];
1463 ArgInfo VGPRArgs = Info.OrigArgs[3];
1464
1468
1469
1470
1471 const Value *CalleeV = Callee.OrigValue->stripPointerCasts();
1474 Info.CallConv = F->getCallingConv();
1475 } else {
1476 assert(Callee.Regs.size() == 1 && "Too many regs for the callee");
1479
1480 }
1481
1482
1483 Info.IsVarArg = false;
1484
1487 "SGPR arguments should be marked inreg");
1490 "VGPR arguments should not be marked inreg");
1491
1495
1496 Info.IsMustTailCall = true;
1497 return lowerTailCall(MIRBuilder, Info, OutArgs);
1498}
1499
1502 if (Function *F = Info.CB->getCalledFunction())
1503 if (F->isIntrinsic()) {
1504 switch (F->getIntrinsicID()) {
1505 case Intrinsic::amdgcn_cs_chain:
1507 case Intrinsic::amdgcn_call_whole_wave:
1509
1510
1511
1514 Info.OrigArgs.erase(Info.OrigArgs.begin());
1515 Info.IsVarArg = false;
1516 break;
1517 default:
1519 }
1520 }
1521
1522 if (Info.IsVarArg) {
1523 LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
1524 return false;
1525 }
1526
1530
1535
1537 for (auto &OrigArg : Info.OrigArgs)
1539
1541 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())
1543
1544
1545 bool CanTailCallOpt =
1547
1548
1549 if (Info.IsMustTailCall && !CanTailCallOpt) {
1550 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1551 return false;
1552 }
1553
1554 Info.IsTailCall = CanTailCallOpt;
1555 if (CanTailCallOpt)
1556 return lowerTailCall(MIRBuilder, Info, OutArgs);
1557
1558
1561 std::tie(AssignFnFixed, AssignFnVarArg) =
1563
1564 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
1567
1568
1569
1570 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, ST.isWave32(),
1571 Info.CallConv);
1572
1574 MIB.addDef(TRI->getReturnAddressReg(MF));
1575
1576 if (!Info.IsConvergent)
1578
1580 return false;
1581
1582
1583 const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
1584 MIB.addRegMask(Mask);
1585
1587 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1588
1589
1590
1591
1593
1596
1597 if ((MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1598 return false;
1599 }
1600
1601
1604 return false;
1605
1606 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);
1607 if ((Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1608 return false;
1609
1611
1612 if (Info.ConvergenceCtrlToken) {
1614 }
1616 ImplicitArgRegs);
1617
1618
1620
1621
1622
1623
1624
1625
1626
1627 if (MIB->getOperand(1).isReg()) {
1629 MF, *TRI, MRI, *ST.getInstrInfo(),
1630 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1631 1));
1632 }
1633
1634
1636
1637
1638
1639
1640 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1641 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
1642 Info.IsVarArg);
1644 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1646 Info.CallConv, Info.IsVarArg))
1647 return false;
1648 }
1649
1650 uint64_t CalleePopBytes = NumBytes;
1651
1652 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1654 .addImm(CalleePopBytes);
1655
1656 if (!Info.CanLowerReturn) {
1657 insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1658 Info.DemoteRegister, Info.DemoteStackIndex);
1659 }
1660
1661 return true;
1662}
1663
1664void AMDGPUCallLowering::addOriginalExecToReturn(
1668 const MachineInstr *Setup = TII->getWholeWaveFunctionSetup(MF);
1669 Ret.addReg(Setup->getOperand(0).getReg());
1670}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
Definition AMDGPUCallLowering.cpp:263
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
Definition AMDGPUCallLowering.cpp:455
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info, bool IsDynamicVGPRChainCall=false)
Definition AMDGPUCallLowering.cpp:1022
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg SPReg
Interface definition for SIRegisterInfo.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
Definition AMDGPUCallLowering.cpp:1233
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
Definition AMDGPUCallLowering.cpp:1131
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
Definition AMDGPUCallLowering.cpp:513
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
Definition AMDGPUCallLowering.cpp:358
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, CallingConv::ID CalleeCC, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
Definition AMDGPUCallLowering.cpp:1196
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
Definition AMDGPUCallLowering.cpp:1090
bool lowerChainCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const
Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
Definition AMDGPUCallLowering.cpp:1459
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
Definition AMDGPUCallLowering.cpp:258
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
Definition AMDGPUCallLowering.cpp:782
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
Definition AMDGPUCallLowering.cpp:593
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
Definition AMDGPUCallLowering.cpp:1500
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
Definition AMDGPUCallLowering.cpp:1048
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
Class for arbitrary precision integers.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool isOneBitSet(unsigned BitNo) const
Determine if this APInt Value only has the specified bit set.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs={}) const
Use Handler to insert code to handle the argument/return values represented by Args.
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs={}) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
CallLowering(const TargetLowering *TLI)
const TargetLowering * getTLI() const
Getter for generic TargetLowering class.
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
A parsed version of the target data layout string in and methods for querying it.
Diagnostic information for unsupported feature in backend.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
iterator_range< arg_iterator > args()
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const SIRegisterInfo * getRegisterInfo() const override
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
unsigned getAddressSpace() const
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setHasTailCall(bool V=true)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert = Opcode .
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert = Opcode .
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
unsigned getBytesInStackArgArea() const
void setIfReturnsVoid(bool Value)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
AMDGPUFunctionArgInfo & getArgInfo()
MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
LLVM_READNONE constexpr bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool canGuaranteeTCO(CallingConv::ID CC)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ SIGN_EXTEND
Conversion operators.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
LLVM_ABI Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
ArgDescriptor WorkItemIDZ
ArgDescriptor WorkItemIDY
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
This struct is a compact representation of a valid (non-zero power of two) alignment.
MCRegister getRegister() const
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Helper struct shared between Function Specialization and SCCP Solver.
const Value * OrigValue
Optionally track the original IR value for the argument.
SmallVector< Register, 4 > Regs
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
uint64_t StackSize
The size of the currently allocated portion of the stack.
MachineIRBuilder & MIRBuilder
Register extendRegister(Register ValReg, const CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.