LLVM: lib/Target/AMDGPU/AMDGPUCallLowering.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
24#include "llvm/IR/IntrinsicsAMDGPU.h"
25
26#define DEBUG_TYPE "amdgpu-call-lowering"
27
28using namespace llvm;
29
30namespace {
31
32
36
37
39 }
40
42}
43
47 : OutgoingValueHandler(B, MRI), MIB(MIB) {}
48
50
55 }
56
61 }
62
65 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
66
67
68
69
71 = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
72 if (TRI->isSGPRReg(MRI, PhysReg)) {
73 LLT Ty = MRI.getType(ExtReg);
75 if (Ty != S32) {
76
77
80 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);
81 else
82 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
83 }
84
85 auto ToSGPR = MIRBuilder
86 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
87 {MRI.getType(ExtReg)})
88 .addReg(ExtReg);
89 ExtReg = ToSGPR.getReg(0);
90 }
91
92 MIRBuilder.buildCopy(PhysReg, ExtReg);
94 }
95};
96
99
101 : IncomingValueHandler(B, MRI) {}
102
106 auto &MFI = MIRBuilder.getMF().getFrameInfo();
107
108
109
110 const bool IsImmutable = !Flags.isByVal();
111 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
113 auto AddrReg = MIRBuilder.buildFrameIndex(
115 StackUsed = std::max(StackUsed, Size + Offset);
116 return AddrReg.getReg(0);
117 }
118
121 markPhysRegUsed(PhysReg);
122
124
125
126 auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
127
128
129
130 auto Extended =
132 MIRBuilder.buildTrunc(ValVReg, Extended);
133 return;
134 }
135
137 }
138
143
147 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
148 }
149
150
151
152
153 virtual void markPhysRegUsed(unsigned PhysReg) = 0;
154};
155
158 : AMDGPUIncomingArgHandler(B, MRI) {}
159
160 void markPhysRegUsed(unsigned PhysReg) override {
161 MIRBuilder.getMBB().addLiveIn(PhysReg);
162 }
163};
164
165struct CallReturnHandler : public AMDGPUIncomingArgHandler {
168 : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
169
170 void markPhysRegUsed(unsigned PhysReg) override {
172 }
173
175};
176
177struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
178
179
180 int FPDiff;
181
182
184
185 bool IsTailCall;
186
189 bool IsTailCall = false, int FPDiff = 0)
190 : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),
191 IsTailCall(IsTailCall) {}
192
199
200 if (IsTailCall) {
205 return FIReg.getReg(0);
206 }
207
209
210 if (!SPReg) {
212 if (ST.enableFlatScratch()) {
213
214 SPReg = MIRBuilder.buildCopy(PtrTy,
216 } else {
217
218
219
220 SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
222 }
223 }
224
226
227 auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
229 return AddrReg.getReg(0);
230 }
231
238
243 }
244
250 ? extendRegister(Arg.Regs[ValRegIndex], VA)
251 : Arg.Regs[ValRegIndex];
252 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
253 }
254};
255}
256
259}
260
261
263 switch (MIOpc) {
264 case TargetOpcode::G_SEXT:
266 case TargetOpcode::G_ZEXT:
268 case TargetOpcode::G_ANYEXT:
270 default:
272 }
273}
274
275bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
278 bool IsVarArg) const {
279
281 return true;
282
285 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
287
289}
290
291
292
296 if (!Val)
297 return true;
298
299 auto &MF = B.getMF();
304
307
311 "For each split Type there should be exactly one VReg.");
312
314
315 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
316 EVT VT = SplitEVTs[i];
320
322 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
323 if (RetInfo.Flags[0].isSExt()) {
324 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
325 ExtendOp = TargetOpcode::G_SEXT;
326 } else if (RetInfo.Flags[0].isZExt()) {
327 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
328 ExtendOp = TargetOpcode::G_ZEXT;
329 }
330
333 if (ExtVT != VT) {
336 Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
337 }
338 }
339
340 if (Reg != RetInfo.Regs[0]) {
341 RetInfo.Regs[0] = Reg;
342
344 }
345
347 }
348
350
351 OutgoingValueAssigner Assigner(AssignFn);
352 AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
355}
356
360
364
365 assert(!Val == VRegs.empty() && "Return value without a vreg");
366
369 const bool IsWaveEnd =
371 if (IsWaveEnd) {
372 B.buildInstr(AMDGPU::S_ENDPGM)
373 .addImm(0);
374 return true;
375 }
376
377 unsigned ReturnOpc =
378 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
379 auto Ret = B.buildInstrNoInsert(ReturnOpc);
380
381 if (!FLI.CanLowerReturn)
383 else if (!lowerReturnVal(B, Val, VRegs, Ret))
384 return false;
385
386
387
388 B.insertInstr(Ret);
389 return true;
390}
391
399 Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
400
402
403 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
404}
405
408 Align Alignment) const {
413
415
418 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
419
420 unsigned Idx = 0;
421 for (ArgInfo &SplitArg : SplitArgs) {
422 Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
423 lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
424
426 if (SplitArg.Flags[0].isPointer()) {
427
428 LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),
431 : PtrTy;
432 }
433
435 PtrInfo,
439
440 assert(SplitArg.Regs.size() == 1);
441
442 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
444 }
445}
446
447
453
456 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
457 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
458 CCInfo.AllocateReg(PrivateSegmentBufferReg);
459 }
460
463 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
465 }
466
469 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
471 }
472
477 Register VReg = MRI.createGenericVirtualRegister(P4);
478 MRI.addLiveIn(InputPtrReg, VReg);
479 B.getMBB().addLiveIn(InputPtrReg);
480 B.buildCopy(VReg, InputPtrReg);
482 }
483
486 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
488 }
489
491 Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
492 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
494 }
495
496
497
498}
499
510
512 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
513
515
516 unsigned i = 0;
517 const Align KernArgBaseAlign(16);
519 uint64_t ExplicitArgOffset = 0;
520
521
522 for (auto &Arg : F.args()) {
523
524 if (Arg.hasAttribute("amdgpu-hidden-argument")) {
525 LLVM_DEBUG(dbgs() << "Preloading hidden arguments is not supported\n");
526 return false;
527 }
528
529 const bool IsByRef = Arg.hasByRefAttr();
530 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
531 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
532 if (AllocSize == 0)
533 continue;
534
535 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;
536 Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
537
538 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
539 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
540
541 if (Arg.use_empty()) {
542 ++i;
543 continue;
544 }
545
547
548 if (IsByRef) {
549 unsigned ByRefAS = cast(Arg.getType())->getAddressSpace();
550
552 "expected only one register for byval pointers");
554 lowerParameterPtr(VRegs[i][0], B, ArgOffset);
555 } else {
557 Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
558 lowerParameterPtr(PtrReg, B, ArgOffset);
559
560 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
561 }
562 } else {
563 ArgInfo OrigArg(VRegs[i], Arg, i);
566 lowerParameter(B, OrigArg, ArgOffset, Alignment);
567 }
568
569 ++i;
570 }
571
574 return true;
575}
576
581
582
583
584
587
590
598
600 CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
602
604 Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
605 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
606 CCInfo.AllocateReg(ImplicitBufferPtrReg);
607 }
608
609
611 Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
612 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
614 }
615
617 unsigned Idx = 0;
618 unsigned PSInputNum = 0;
619
620
621
622 if (!FLI.CanLowerReturn)
624
625 for (auto &Arg : F.args()) {
626 if (DL.getTypeStoreSize(Arg.getType()) == 0)
627 continue;
628
629 const bool InReg = Arg.hasAttribute(Attribute::InReg);
630
631 if (Arg.hasAttribute(Attribute::SwiftSelf) ||
632 Arg.hasAttribute(Attribute::SwiftError) ||
633 Arg.hasAttribute(Attribute::Nest))
634 return false;
635
637 const bool ArgUsed = !Arg.use_empty();
638 bool SkipArg = !ArgUsed && ->isPSInputAllocated(PSInputNum);
639
640 if (!SkipArg) {
641 Info->markPSInputAllocated(PSInputNum);
642 if (ArgUsed)
643 Info->markPSInputEnabled(PSInputNum);
644 }
645
646 ++PSInputNum;
647
648 if (SkipArg) {
650 B.buildUndef(R);
651
653 continue;
654 }
655 }
656
660
663 }
664
665
666
667
668
669
670
671
672
673
674
675
676
677
679 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
680 ((Info->getPSInputAddr() & 0xF) == 0 &&
681 Info->isPSInputAllocated(11))) {
684 Info->markPSInputAllocated(0);
685 Info->markPSInputEnabled(0);
686 }
687
688 if (Subtarget.isAmdPalOS()) {
689
690
691
692
693
694
695
696
697 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
698 if ((PsInputBits & 0x7F) == 0 ||
699 ((PsInputBits & 0xF) == 0 &&
700 (PsInputBits >> 11 & 1)))
702 }
703 }
704
707
710
711 if (!IsEntryFunc && !IsGraphics) {
712
714
715 if (!Subtarget.enableFlatScratch())
718 }
719
722 return false;
723
726 return false;
727
729
730
731 if (IsEntryFunc)
733
734
735
736
737
738 Info->setBytesInStackArgArea(StackSize);
739
740
742
743 return true;
744}
745
748 SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
751
752
753
754 if (.CB)
755 return true;
756
759
762
763
764
765
766
776 };
777
778 static constexpr StringLiteral ImplicitAttrNames[] = {
779 "amdgpu-no-dispatch-ptr",
780 "amdgpu-no-queue-ptr",
781 "amdgpu-no-implicitarg-ptr",
782 "amdgpu-no-dispatch-id",
783 "amdgpu-no-workgroup-id-x",
784 "amdgpu-no-workgroup-id-y",
785 "amdgpu-no-workgroup-id-z",
786 "amdgpu-no-lds-kernel-id",
787 };
788
790
794
795 unsigned I = 0;
796 for (auto InputID : InputRegs) {
799 LLT ArgTy;
800
801
802 if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
803 continue;
804
805 std::tie(OutgoingArg, ArgRC, ArgTy) =
807 if (!OutgoingArg)
808 continue;
809
812 std::tie(IncomingArg, IncomingArgRC, ArgTy) =
813 CallerArgInfo.getPreloadedValue(InputID);
814 assert(IncomingArgRC == ArgRC);
815
816 Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
817
818 if (IncomingArg) {
819 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
821 LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
823 std::optional<uint32_t> Id =
825 if (Id) {
827 } else {
829 }
830 } else {
831
832
834 }
835
837 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
840 } else {
841 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
842 return false;
843 }
844 }
845
846
847
850 LLT ArgTy;
851
852 std::tie(OutgoingArg, ArgRC, ArgTy) =
854 if (!OutgoingArg)
855 std::tie(OutgoingArg, ArgRC, ArgTy) =
857 if (!OutgoingArg)
858 std::tie(OutgoingArg, ArgRC, ArgTy) =
860 if (!OutgoingArg)
861 return false;
862
863 auto WorkitemIDX =
865 auto WorkitemIDY =
867 auto WorkitemIDZ =
869
870 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
871 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
872 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
874
875 const bool NeedWorkItemIDX = .CB->hasFnAttr("amdgpu-no-workitem-id-x");
876 const bool NeedWorkItemIDY = .CB->hasFnAttr("amdgpu-no-workitem-id-y");
877 const bool NeedWorkItemIDZ = .CB->hasFnAttr("amdgpu-no-workitem-id-z");
878
879
880
882 if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
883 NeedWorkItemIDX) {
884 if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
885 InputReg = MRI.createGenericVirtualRegister(S32);
886 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
887 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
888 } else {
890 }
891 }
892
893 if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
894 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
896 LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
897 std::get<2>(WorkitemIDY));
898
900 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
901 }
902
903 if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
904 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
906 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
907 std::get<2>(WorkitemIDZ));
908
910 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
911 }
912
913 if (!InputReg &&
914 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
915 InputReg = MRI.createGenericVirtualRegister(S32);
916 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
917
918
919
920
922 } else {
923
924
926 IncomingArgX ? *IncomingArgX :
927 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
928 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
929 &AMDGPU::VGPR_32RegClass, S32);
930 }
931 }
932
934 if (InputReg)
935 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
936
939 } else {
940 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
941 return false;
942 }
943
944 return true;
945}
946
947
948
949static std::pair<CCAssignFn *, CCAssignFn *>
952}
953
955 bool IsTailCall, bool isWave32,
957
959 "Indirect calls can't be tail calls, "
960 "because the address can be divergent");
961 if (!IsTailCall)
962 return AMDGPU::G_SI_CALL;
963
965 return isWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
966
968 AMDGPU::SI_TCRETURN;
969}
970
971
975 if (Info.Callee.isReg()) {
978 } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
979
980
986 } else
987 return false;
988
989 return true;
990}
991
998
999
1000 if (CalleeCC == CallerCC)
1001 return true;
1002
1004
1005
1006 const auto *TRI = ST.getRegisterInfo();
1007
1008 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1009 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
1010 if (->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
1011 return false;
1012
1013
1017 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
1019
1022 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
1024
1025
1026
1028 CalleeAssignFnVarArg);
1030 CallerAssignFnVarArg);
1032}
1033
1037
1038 if (OutArgs.empty())
1039 return true;
1040
1045
1048 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1049
1050
1052 CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
1054
1056 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
1057 return false;
1058 }
1059
1060
1063 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
1064 return false;
1065 }
1066
1067
1070 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
1073}
1074
1075
1078}
1079
1080
1082 switch (CC) {
1085 return true;
1086 default:
1088 }
1089}
1090
1094
1095 if (.IsTailCall)
1096 return false;
1097
1098
1099
1100 if (Info.Callee.isReg())
1101 return false;
1102
1107
1109 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
1110
1111
1112 if (!CallerPreserved)
1113 return false;
1114
1116 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1117 return false;
1118 }
1119
1121 return A.hasByValAttr() || A.hasSwiftErrorAttr();
1122 })) {
1123 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "
1124 "or swifterror arguments\n");
1125 return false;
1126 }
1127
1128
1131
1132
1133
1137 << "... Caller and callee have incompatible calling conventions.\n");
1138 return false;
1139 }
1140
1141
1142
1143
1145 return false;
1146
1147 LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");
1148 return true;
1149}
1150
1151
1152
1153
1158 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {
1159 if (!ST.enableFlatScratch()) {
1160
1161
1164
1166 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
1167 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
1168
1169 MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);
1171 }
1172
1173 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1176 }
1177}
1178
1188
1189
1191
1192
1196 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
1197
1199 if (!IsSibCall)
1200 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
1201
1202 unsigned Opc =
1203 getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);
1206 return false;
1207
1208
1209
1211
1212
1216 assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC");
1217
1218 if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize()))
1219 return false;
1220
1221 if (const auto *CI = dyn_cast(ExecArg.OrigValue)) {
1222 MIB.addImm(CI->getSExtValue());
1223 } else {
1224 MIB.addReg(ExecArg.Regs[0]);
1225 unsigned Idx = MIB->getNumOperands() - 1;
1227 MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1228 MIB->getDesc(), MIB->getOperand(Idx), Idx));
1229 }
1230 }
1231
1232
1233 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1234 MIB.addRegMask(Mask);
1235
1236
1237
1238
1239
1240
1241 int FPDiff = 0;
1242
1243
1244
1245
1246 unsigned NumBytes = 0;
1247 if (!IsSibCall) {
1248
1249
1250
1251 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1253 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1254
1255
1258 return false;
1259
1260
1261
1263
1264
1265
1266
1267 FPDiff = NumReusableBytes - NumBytes;
1268
1269
1270
1271
1272
1273
1275 "unaligned stack on tail call");
1276 }
1277
1279 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1280
1281
1282
1283
1285
1288
1290 return false;
1291 }
1292
1294
1296 return false;
1297
1298
1299 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);
1300 if ((Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1301 return false;
1302
1303 if (Info.ConvergenceCtrlToken) {
1305 }
1307 ImplicitArgRegs);
1308
1309
1310
1311 if (!IsSibCall) {
1312 MIB->getOperand(1).setImm(FPDiff);
1314
1315
1316
1317
1319 }
1320
1321
1323
1324
1325
1326
1327
1328
1329
1330 if (MIB->getOperand(0).isReg()) {
1332 MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,
1333 MIB->getDesc(), MIB->getOperand(0), 0));
1334 }
1335
1337 Info.LoweredTailCall = true;
1338 return true;
1339}
1340
1341
1348
1349 assert(cast(Flags.OrigValue)->isZero() &&
1350 "Non-zero flags aren't supported yet.");
1351 assert(Info.OrigArgs.size() == 5 && "Additional args aren't supported yet.");
1352
1356
1357
1358
1359 const Value *CalleeV = Callee.OrigValue->stripPointerCasts();
1360 if (const Function *F = dyn_cast(CalleeV)) {
1362 Info.CallConv = F->getCallingConv();
1363 } else {
1364 assert(Callee.Regs.size() == 1 && "Too many regs for the callee");
1367
1368 }
1369
1370
1371 Info.IsVarArg = false;
1372
1375 "SGPR arguments should be marked inreg");
1378 "VGPR arguments should not be marked inreg");
1379
1383
1384 Info.IsMustTailCall = true;
1386}
1387
1391 if (F->isIntrinsic()) {
1392 assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
1393 "Unexpected intrinsic");
1395 }
1396
1397 if (Info.IsVarArg) {
1398 LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
1399 return false;
1400 }
1401
1405
1410
1412 for (auto &OrigArg : Info.OrigArgs)
1414
1416 if (Info.CanLowerReturn && .OrigRet.Ty->isVoidTy())
1418
1419
1420 bool CanTailCallOpt =
1422
1423
1424 if (Info.IsMustTailCall && !CanTailCallOpt) {
1425 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1426 return false;
1427 }
1428
1429 Info.IsTailCall = CanTailCallOpt;
1430 if (CanTailCallOpt)
1432
1433
1436 std::tie(AssignFnFixed, AssignFnVarArg) =
1438
1439 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
1442
1443
1444
1445 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, ST.isWave32(),
1446 Info.CallConv);
1447
1449 MIB.addDef(TRI->getReturnAddressReg(MF));
1450
1451 if (.IsConvergent)
1453
1455 return false;
1456
1457
1458 const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
1459 MIB.addRegMask(Mask);
1460
1462 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
1463
1464
1465
1466
1468
1470
1472 return false;
1473 }
1474
1475
1477
1480 return false;
1481
1482 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);
1483 if ((Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
1484 return false;
1485
1487
1488 if (Info.ConvergenceCtrlToken) {
1490 }
1492 ImplicitArgRegs);
1493
1494
1496
1497
1498
1499
1500
1501
1502
1503 if (MIB->getOperand(1).isReg()) {
1505 MF, *TRI, MRI, *ST.getInstrInfo(),
1506 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1507 1));
1508 }
1509
1510
1512
1513
1514
1515
1516 if (Info.CanLowerReturn && .OrigRet.Ty->isVoidTy()) {
1518 Info.IsVarArg);
1520 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1522 Info.CallConv, Info.IsVarArg))
1523 return false;
1524 }
1525
1526 uint64_t CalleePopBytes = NumBytes;
1527
1528 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1530 .addImm(CalleePopBytes);
1531
1532 if (.CanLowerReturn) {
1534 Info.DemoteRegister, Info.DemoteStackIndex);
1535 }
1536
1537 return true;
1538}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const
Returns true if the call can be lowered as a tail call.
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, CallingConv::ID CalleeCC, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const
bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const
bool lowerChainCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const
Lower a call to the @llvm.amdgcn.cs.chain intrinsic.
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
CCState - This class holds information needed while lowering arguments and return values.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs={}) const
Use Handler to insert code to handle the argument/return values represented by Args.
bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs={}) const
Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const
Analyze the argument list in Args, using Assigner to populate CCInfo.
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
A parsed version of the target data layout string in and methods for querying it.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
iterator_range< arg_iterator > args()
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const SIRegisterInfo * getRegisterInfo() const override
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
unsigned getAddressSpace() const
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
This is an important class for using LLVM in a threaded context.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setHasTailCall(bool V=true)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert = Opcode .
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert = Opcode .
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
unsigned getBytesInStackArgArea() const
void setIfReturnsVoid(bool Value)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
AMDGPUFunctionArgInfo & getArgInfo()
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isChainCC(CallingConv::ID CC)
bool isShader(CallingConv::ID cc)
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ SIGN_EXTEND
Conversion operators.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
ArgDescriptor WorkItemIDZ
ArgDescriptor WorkItemIDY
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
This struct is a compact representation of a valid (non-zero power of two) alignment.
MCRegister getRegister() const
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Helper struct shared between Function Specialization and SCCP Solver.
const Value * OrigValue
Optionally track the original IR value for the argument.
SmallVector< Register, 4 > Regs
SmallVector< ISD::ArgFlagsTy, 4 > Flags
Base class for ValueHandlers used for arguments coming into the current function, or for return value...
void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA) override
Provides a default implementation for argument handling.
Register buildExtensionHint(const CCValAssign &VA, Register SrcReg, LLT NarrowTy)
Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...
Base class for ValueHandlers used for arguments passed to a function call, or for return values.
uint64_t StackSize
The size of the currently allocated portion of the stack.
MachineIRBuilder & MIRBuilder
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0
Materialize a VReg containing the address of the specified stack-based object.
virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA)=0
The specified value has been assigned to a stack location.
Register extendRegister(Register ValReg, const CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
virtual void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA)=0
The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.