LLVM: lib/Target/AMDGPU/AMDGPUCallLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

24#include "llvm/IR/IntrinsicsAMDGPU.h"

25

26#define DEBUG_TYPE "amdgpu-call-lowering"

27

28using namespace llvm;

29

30namespace {

31

32

36

37

39 }

40

42}

43

47 : OutgoingValueHandler(B, MRI), MIB(MIB) {}

48

50

55 }

56

61 }

62

65 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);

66

67

68

69

71 = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());

72 if (TRI->isSGPRReg(MRI, PhysReg)) {

73 LLT Ty = MRI.getType(ExtReg);

75 if (Ty != S32) {

76

77

80 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);

81 else

82 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);

83 }

84

85 auto ToSGPR = MIRBuilder

86 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,

87 {MRI.getType(ExtReg)})

88 .addReg(ExtReg);

89 ExtReg = ToSGPR.getReg(0);

90 }

91

92 MIRBuilder.buildCopy(PhysReg, ExtReg);

94 }

95};

96

99

101 : IncomingValueHandler(B, MRI) {}

102

106 auto &MFI = MIRBuilder.getMF().getFrameInfo();

107

108

109

110 const bool IsImmutable = !Flags.isByVal();

111 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);

113 auto AddrReg = MIRBuilder.buildFrameIndex(

115 StackUsed = std::max(StackUsed, Size + Offset);

116 return AddrReg.getReg(0);

117 }

118

121 markPhysRegUsed(PhysReg);

122

124

125

126 auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);

127

128

129

130 auto Extended =

132 MIRBuilder.buildTrunc(ValVReg, Extended);

133 return;

134 }

135

137 }

138

143

147 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);

148 }

149

150

151

152

153 virtual void markPhysRegUsed(unsigned PhysReg) = 0;

154};

155

158 : AMDGPUIncomingArgHandler(B, MRI) {}

159

160 void markPhysRegUsed(unsigned PhysReg) override {

161 MIRBuilder.getMBB().addLiveIn(PhysReg);

162 }

163};

164

165struct CallReturnHandler : public AMDGPUIncomingArgHandler {

168 : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}

169

170 void markPhysRegUsed(unsigned PhysReg) override {

172 }

173

175};

176

177struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {

178

179

180 int FPDiff;

181

182

184

185 bool IsTailCall;

186

189 bool IsTailCall = false, int FPDiff = 0)

190 : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),

191 IsTailCall(IsTailCall) {}

192

199

200 if (IsTailCall) {

205 return FIReg.getReg(0);

206 }

207

209

210 if (!SPReg) {

212 if (ST.enableFlatScratch()) {

213

214 SPReg = MIRBuilder.buildCopy(PtrTy,

216 } else {

217

218

219

220 SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},

222 }

223 }

224

226

227 auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);

229 return AddrReg.getReg(0);

230 }

231

238

243 }

244

250 ? extendRegister(Arg.Regs[ValRegIndex], VA)

251 : Arg.Regs[ValRegIndex];

252 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);

253 }

254};

255}

256

259}

260

261

263 switch (MIOpc) {

264 case TargetOpcode::G_SEXT:

266 case TargetOpcode::G_ZEXT:

268 case TargetOpcode::G_ANYEXT:

270 default:

272 }

273}

274

275bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,

278 bool IsVarArg) const {

279

281 return true;

282

285 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,

287

289}

290

291

292

296 if (!Val)

297 return true;

298

299 auto &MF = B.getMF();

304

307

311 "For each split Type there should be exactly one VReg.");

312

314

315 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {

316 EVT VT = SplitEVTs[i];

320

322 unsigned ExtendOp = TargetOpcode::G_ANYEXT;

323 if (RetInfo.Flags[0].isSExt()) {

324 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");

325 ExtendOp = TargetOpcode::G_SEXT;

326 } else if (RetInfo.Flags[0].isZExt()) {

327 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");

328 ExtendOp = TargetOpcode::G_ZEXT;

329 }

330

333 if (ExtVT != VT) {

336 Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);

337 }

338 }

339

340 if (Reg != RetInfo.Regs[0]) {

341 RetInfo.Regs[0] = Reg;

342

344 }

345

347 }

348

350

351 OutgoingValueAssigner Assigner(AssignFn);

352 AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);

354 CC, F.isVarArg());

355}

356

360

364

365 assert(!Val == VRegs.empty() && "Return value without a vreg");

366

369 const bool IsWaveEnd =

371 if (IsWaveEnd) {

372 B.buildInstr(AMDGPU::S_ENDPGM)

373 .addImm(0);

374 return true;

375 }

376

377 unsigned ReturnOpc =

378 IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;

379 auto Ret = B.buildInstrNoInsert(ReturnOpc);

380

381 if (!FLI.CanLowerReturn)

383 else if (!lowerReturnVal(B, Val, VRegs, Ret))

384 return false;

385

386

387

388 B.insertInstr(Ret);

389 return true;

390}

391

399 Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);

400

402

403 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);

404}

405

408 Align Alignment) const {

413

415

418 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);

419

420 unsigned Idx = 0;

421 for (ArgInfo &SplitArg : SplitArgs) {

422 Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);

423 lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);

424

426 if (SplitArg.Flags[0].isPointer()) {

427

428 LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),

431 : PtrTy;

432 }

433

435 PtrInfo,

439

440 assert(SplitArg.Regs.size() == 1);

441

442 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);

444 }

445}

446

447

453

456 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);

457 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);

458 CCInfo.AllocateReg(PrivateSegmentBufferReg);

459 }

460

463 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);

465 }

466

469 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);

471 }

472

477 Register VReg = MRI.createGenericVirtualRegister(P4);

478 MRI.addLiveIn(InputPtrReg, VReg);

479 B.getMBB().addLiveIn(InputPtrReg);

480 B.buildCopy(VReg, InputPtrReg);

482 }

483

486 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);

488 }

489

491 Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);

492 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);

494 }

495

496

497

498}

499

510

512 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());

513

515

516 unsigned i = 0;

517 const Align KernArgBaseAlign(16);

519 uint64_t ExplicitArgOffset = 0;

520

521

522 for (auto &Arg : F.args()) {

523

524 if (Arg.hasAttribute("amdgpu-hidden-argument")) {

525 LLVM_DEBUG(dbgs() << "Preloading hidden arguments is not supported\n");

526 return false;

527 }

528

529 const bool IsByRef = Arg.hasByRefAttr();

530 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();

531 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);

532 if (AllocSize == 0)

533 continue;

534

535 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;

536 Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);

537

538 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;

539 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;

540

541 if (Arg.use_empty()) {

542 ++i;

543 continue;

544 }

545

547

548 if (IsByRef) {

549 unsigned ByRefAS = cast(Arg.getType())->getAddressSpace();

550

552 "expected only one register for byval pointers");

554 lowerParameterPtr(VRegs[i][0], B, ArgOffset);

555 } else {

557 Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);

558 lowerParameterPtr(PtrReg, B, ArgOffset);

559

560 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);

561 }

562 } else {

563 ArgInfo OrigArg(VRegs[i], Arg, i);

566 lowerParameter(B, OrigArg, ArgOffset, Alignment);

567 }

568

569 ++i;

570 }

571

574 return true;

575}

576

581

582

583

584

587

590

598

600 CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());

602

604 Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);

605 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);

606 CCInfo.AllocateReg(ImplicitBufferPtrReg);

607 }

608

609

611 Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);

612 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);

614 }

615

617 unsigned Idx = 0;

618 unsigned PSInputNum = 0;

619

620

621

622 if (!FLI.CanLowerReturn)

624

625 for (auto &Arg : F.args()) {

626 if (DL.getTypeStoreSize(Arg.getType()) == 0)

627 continue;

628

629 const bool InReg = Arg.hasAttribute(Attribute::InReg);

630

631 if (Arg.hasAttribute(Attribute::SwiftSelf) ||

632 Arg.hasAttribute(Attribute::SwiftError) ||

633 Arg.hasAttribute(Attribute::Nest))

634 return false;

635

637 const bool ArgUsed = !Arg.use_empty();

638 bool SkipArg = !ArgUsed && Info->isPSInputAllocated(PSInputNum);

639

640 if (!SkipArg) {

641 Info->markPSInputAllocated(PSInputNum);

642 if (ArgUsed)

643 Info->markPSInputEnabled(PSInputNum);

644 }

645

646 ++PSInputNum;

647

648 if (SkipArg) {

650 B.buildUndef(R);

651

653 continue;

654 }

655 }

656

660

663 }

664

665

666

667

668

669

670

671

672

673

674

675

676

677

679 if ((Info->getPSInputAddr() & 0x7F) == 0 ||

680 ((Info->getPSInputAddr() & 0xF) == 0 &&

681 Info->isPSInputAllocated(11))) {

684 Info->markPSInputAllocated(0);

685 Info->markPSInputEnabled(0);

686 }

687

688 if (Subtarget.isAmdPalOS()) {

689

690

691

692

693

694

695

696

697 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();

698 if ((PsInputBits & 0x7F) == 0 ||

699 ((PsInputBits & 0xF) == 0 &&

700 (PsInputBits >> 11 & 1)))

702 }

703 }

704

707

710

711 if (!IsEntryFunc && !IsGraphics) {

712

714

715 if (!Subtarget.enableFlatScratch())

718 }

719

722 return false;

723

726 return false;

727

729

730

731 if (IsEntryFunc)

733

734

735

736

737

738 Info->setBytesInStackArgArea(StackSize);

739

740

741 B.setMBB(MBB);

742

743 return true;

744}

745

748 SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,

751

752

753

754 if (Info.CB)

755 return true;

756

759

762

763

764

765

766

776 };

777

778 static constexpr StringLiteral ImplicitAttrNames[] = {

779 "amdgpu-no-dispatch-ptr",

780 "amdgpu-no-queue-ptr",

781 "amdgpu-no-implicitarg-ptr",

782 "amdgpu-no-dispatch-id",

783 "amdgpu-no-workgroup-id-x",

784 "amdgpu-no-workgroup-id-y",

785 "amdgpu-no-workgroup-id-z",

786 "amdgpu-no-lds-kernel-id",

787 };

788

790

794

795 unsigned I = 0;

796 for (auto InputID : InputRegs) {

799 LLT ArgTy;

800

801

802 if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))

803 continue;

804

805 std::tie(OutgoingArg, ArgRC, ArgTy) =

807 if (!OutgoingArg)

808 continue;

809

812 std::tie(IncomingArg, IncomingArgRC, ArgTy) =

813 CallerArgInfo.getPreloadedValue(InputID);

814 assert(IncomingArgRC == ArgRC);

815

816 Register InputReg = MRI.createGenericVirtualRegister(ArgTy);

817

818 if (IncomingArg) {

819 LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);

821 LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);

823 std::optional<uint32_t> Id =

825 if (Id) {

827 } else {

829 }

830 } else {

831

832

834 }

835

837 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);

840 } else {

841 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");

842 return false;

843 }

844 }

845

846

847

850 LLT ArgTy;

851

852 std::tie(OutgoingArg, ArgRC, ArgTy) =

854 if (!OutgoingArg)

855 std::tie(OutgoingArg, ArgRC, ArgTy) =

857 if (!OutgoingArg)

858 std::tie(OutgoingArg, ArgRC, ArgTy) =

860 if (!OutgoingArg)

861 return false;

862

863 auto WorkitemIDX =

865 auto WorkitemIDY =

867 auto WorkitemIDZ =

869

870 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);

871 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);

872 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);

874

875 const bool NeedWorkItemIDX = Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");

876 const bool NeedWorkItemIDY = Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");

877 const bool NeedWorkItemIDZ = Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");

878

879

880

882 if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&

883 NeedWorkItemIDX) {

884 if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {

885 InputReg = MRI.createGenericVirtualRegister(S32);

886 LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,

887 std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));

888 } else {

890 }

891 }

892

893 if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&

894 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {

896 LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),

897 std::get<2>(WorkitemIDY));

898

900 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;

901 }

902

903 if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&

904 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {

906 LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),

907 std::get<2>(WorkitemIDZ));

908

910 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;

911 }

912

913 if (!InputReg &&

914 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {

915 InputReg = MRI.createGenericVirtualRegister(S32);

916 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {

917

918

919

920

922 } else {

923

924

926 IncomingArgX ? *IncomingArgX :

927 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);

928 LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,

929 &AMDGPU::VGPR_32RegClass, S32);

930 }

931 }

932

934 if (InputReg)

935 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);

936

939 } else {

940 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");

941 return false;

942 }

943

944 return true;

945}

946

947

948

949static std::pair<CCAssignFn *, CCAssignFn *>

952}

953

955 bool IsTailCall, bool isWave32,

957

959 "Indirect calls can't be tail calls, "

960 "because the address can be divergent");

961 if (!IsTailCall)

962 return AMDGPU::G_SI_CALL;

963

965 return isWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;

966

968 AMDGPU::SI_TCRETURN;

969}

970

971

975 if (Info.Callee.isReg()) {

978 } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {

979

980

986 } else

987 return false;

988

989 return true;

990}

991

998

999

1000 if (CalleeCC == CallerCC)

1001 return true;

1002

1004

1005

1006 const auto *TRI = ST.getRegisterInfo();

1007

1008 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);

1009 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);

1010 if (TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))

1011 return false;

1012

1013

1017 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =

1019

1022 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =

1024

1025

1026

1028 CalleeAssignFnVarArg);

1030 CallerAssignFnVarArg);

1032}

1033

1037

1038 if (OutArgs.empty())

1039 return true;

1040

1045

1048 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);

1049

1050

1052 CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());

1054

1056 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");

1057 return false;

1058 }

1059

1060

1063 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");

1064 return false;

1065 }

1066

1067

1070 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);

1073}

1074

1075

1078}

1079

1080

1082 switch (CC) {

1085 return true;

1086 default:

1088 }

1089}

1090

1094

1095 if (Info.IsTailCall)

1096 return false;

1097

1098

1099

1100 if (Info.Callee.isReg())

1101 return false;

1102

1107

1109 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);

1110

1111

1112 if (!CallerPreserved)

1113 return false;

1114

1116 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");

1117 return false;

1118 }

1119

1121 return A.hasByValAttr() || A.hasSwiftErrorAttr();

1122 })) {

1123 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "

1124 "or swifterror arguments\n");

1125 return false;

1126 }

1127

1128

1131

1132

1133

1137 << "... Caller and callee have incompatible calling conventions.\n");

1138 return false;

1139 }

1140

1141

1142

1143

1145 return false;

1146

1147 LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");

1148 return true;

1149}

1150

1151

1152

1153

1158 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {

1159 if (!ST.enableFlatScratch()) {

1160

1161

1164

1166 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51

1167 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;

1168

1169 MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);

1171 }

1172

1173 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {

1176 }

1177}

1178

1188

1189

1191

1192

1196 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);

1197

1199 if (!IsSibCall)

1200 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);

1201

1202 unsigned Opc =

1203 getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);

1206 return false;

1207

1208

1209

1211

1212

1216 assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC");

1217

1218 if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize()))

1219 return false;

1220

1221 if (const auto *CI = dyn_cast(ExecArg.OrigValue)) {

1222 MIB.addImm(CI->getSExtValue());

1223 } else {

1224 MIB.addReg(ExecArg.Regs[0]);

1225 unsigned Idx = MIB->getNumOperands() - 1;

1227 MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,

1228 MIB->getDesc(), MIB->getOperand(Idx), Idx));

1229 }

1230 }

1231

1232

1233 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);

1234 MIB.addRegMask(Mask);

1235

1236

1237

1238

1239

1240

1241 int FPDiff = 0;

1242

1243

1244

1245

1246 unsigned NumBytes = 0;

1247 if (!IsSibCall) {

1248

1249

1250

1251 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();

1253 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());

1254

1255

1258 return false;

1259

1260

1261

1263

1264

1265

1266

1267 FPDiff = NumReusableBytes - NumBytes;

1268

1269

1270

1271

1272

1273

1275 "unaligned stack on tail call");

1276 }

1277

1279 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());

1280

1281

1282

1283

1285

1288

1290 return false;

1291 }

1292

1294

1296 return false;

1297

1298

1299 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);

1300 if (handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))

1301 return false;

1302

1303 if (Info.ConvergenceCtrlToken) {

1305 }

1307 ImplicitArgRegs);

1308

1309

1310

1311 if (!IsSibCall) {

1312 MIB->getOperand(1).setImm(FPDiff);

1314

1315

1316

1317

1319 }

1320

1321

1323

1324

1325

1326

1327

1328

1329

1330 if (MIB->getOperand(0).isReg()) {

1332 MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB,

1333 MIB->getDesc(), MIB->getOperand(0), 0));

1334 }

1335

1337 Info.LoweredTailCall = true;

1338 return true;

1339}

1340

1341

1348

1349 assert(cast(Flags.OrigValue)->isZero() &&

1350 "Non-zero flags aren't supported yet.");

1351 assert(Info.OrigArgs.size() == 5 && "Additional args aren't supported yet.");

1352

1356

1357

1358

1359 const Value *CalleeV = Callee.OrigValue->stripPointerCasts();

1360 if (const Function *F = dyn_cast(CalleeV)) {

1362 Info.CallConv = F->getCallingConv();

1363 } else {

1364 assert(Callee.Regs.size() == 1 && "Too many regs for the callee");

1367

1368 }

1369

1370

1371 Info.IsVarArg = false;

1372

1375 "SGPR arguments should be marked inreg");

1378 "VGPR arguments should not be marked inreg");

1379

1383

1384 Info.IsMustTailCall = true;

1386}

1387

1391 if (F->isIntrinsic()) {

1392 assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&

1393 "Unexpected intrinsic");

1395 }

1396

1397 if (Info.IsVarArg) {

1398 LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");

1399 return false;

1400 }

1401

1405

1410

1412 for (auto &OrigArg : Info.OrigArgs)

1414

1416 if (Info.CanLowerReturn && Info.OrigRet.Ty->isVoidTy())

1418

1419

1420 bool CanTailCallOpt =

1422

1423

1424 if (Info.IsMustTailCall && !CanTailCallOpt) {

1425 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");

1426 return false;

1427 }

1428

1429 Info.IsTailCall = CanTailCallOpt;

1430 if (CanTailCallOpt)

1432

1433

1436 std::tie(AssignFnFixed, AssignFnVarArg) =

1438

1439 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)

1442

1443

1444

1445 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, ST.isWave32(),

1446 Info.CallConv);

1447

1449 MIB.addDef(TRI->getReturnAddressReg(MF));

1450

1451 if (Info.IsConvergent)

1453

1455 return false;

1456

1457

1458 const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);

1459 MIB.addRegMask(Mask);

1460

1462 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());

1463

1464

1465

1466

1468

1470

1472 return false;

1473 }

1474

1475

1477

1480 return false;

1481

1482 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);

1483 if (handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))

1484 return false;

1485

1487

1488 if (Info.ConvergenceCtrlToken) {

1490 }

1492 ImplicitArgRegs);

1493

1494

1496

1497

1498

1499

1500

1501

1502

1503 if (MIB->getOperand(1).isReg()) {

1505 MF, *TRI, MRI, *ST.getInstrInfo(),

1506 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),

1507 1));

1508 }

1509

1510

1512

1513

1514

1515

1516 if (Info.CanLowerReturn && Info.OrigRet.Ty->isVoidTy()) {

1518 Info.IsVarArg);

1520 CallReturnHandler Handler(MIRBuilder, MRI, MIB);

1522 Info.CallConv, Info.IsVarArg))

1523 return false;

1524 }

1525

1526 uint64_t CalleePopBytes = NumBytes;

1527

1528 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)

1530 .addImm(CalleePopBytes);

1531

1532 if (Info.CanLowerReturn) {

1534 Info.DemoteRegister, Info.DemoteStackIndex);

1535 }

1536

1537 return true;

1538}

unsigned const MachineRegisterInfo * MRI

static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)

static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)

Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.

static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)

Return true if the calling convention is one that we can guarantee TCO for.

static bool mayTailCallThisCC(CallingConv::ID CC)

Return true if we might ever do TCO for calls with this calling convention.

static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)

static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)

static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)

This file describes how to lower LLVM calls to machine code calls.

This file declares the targeting of the Machinelegalizer class for AMDGPU.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Analysis containing CSE Info

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file declares the MachineIRBuilder class.

unsigned const TargetRegisterInfo * TRI

static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Interface definition for SIRegisterInfo.

static const AMDGPUFunctionArgInfo FixedABIFunctionInfo

bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const

bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const

Returns true if the call can be lowered as a tail call.

bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const

bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override

This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...

void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, CallingConv::ID CalleeCC, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const

bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const

bool lowerChainCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const

Lower a call to the @llvm.amdgcn.cs.chain intrinsic.

AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)

bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const

bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override

This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...

bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override

This hook must be implemented to lower the given call instruction, including argument and return valu...

bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const

static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)

unsigned getExplicitKernelArgOffset() const

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override

Return the type that should be used to zero or sign extend a zeroext/signext integer return value.

static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)

static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)

Selects the correct CCAssignFn for a given CallingConvention value.

This class represents an incoming formal argument to a Function.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

CCState - This class holds information needed while lowering arguments and return values.

MCRegister AllocateReg(MCPhysReg Reg)

AllocateReg - Attempt to allocate one register.

uint64_t getStackSize() const

Returns the size of the currently allocated portion of the stack.

CCValAssign - Represent assignment of one arg/retval to a location.

LocInfo getLocInfo() const

int64_t getLocMemOffset() const

This class represents a function call, abstracting a target machine's calling convention.

void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const

Load the returned value from the stack into virtual registers in VRegs.

bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs={}) const

Use Handler to insert code to handle the argument/return values represented by Args.

bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const

void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const

Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.

void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const

Insert the hidden sret ArgInfo to the beginning of SplitArgs.

bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs={}) const

Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...

void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const

Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.

bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const

Check whether parameters to a call that are passed in callee saved registers are the same as from the...

bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const

Analyze the argument list in Args, using Assigner to populate CCInfo.

bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const

void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const

A parsed version of the target data layout string in and methods for querying it.

FunctionLoweringInfo - This contains information that is global to a function that is used when lower...

iterator_range< arg_iterator > args()

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

const SIRegisterInfo * getRegisterInfo() const override

bool hasKernargSegmentPtr() const

bool hasDispatchID() const

bool hasPrivateSegmentBuffer() const

bool hasImplicitBufferPtr() const

bool hasDispatchPtr() const

bool hasFlatScratchInit() const

unsigned getAddressSpace() const

constexpr unsigned getScalarSizeInBits() const

static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)

Get a low-level vector of some number of elements and element width.

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

constexpr bool isVector() const

static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)

Get a low-level pointer in the given address space.

constexpr TypeSize getSizeInBits() const

Returns the total size of the type. Must only be called on sized types.

constexpr bool isPointer() const

constexpr ElementCount getElementCount() const

static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)

Get a low-level fixed-width vector of some number of elements and element width.

This is an important class for using LLVM in a threaded context.

TypeSize getSizeInBits() const

Returns the size of the specified MVT in bits.

int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)

Create a new object at a fixed location on the stack.

void setHasTailCall(bool V=true)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const DataLayout & getDataLayout() const

Return the DataLayout attached to the Module associated to this MF.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)

addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Helper class to build MachineInstr.

MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)

Insert an existing instruction at the insertion point.

MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)

Build and insert Res = G_GLOBAL_VALUE GV.

MachineInstrBuilder buildUndef(const DstOp &Res)

Build and insert Res = IMPLICIT_DEF.

MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)

Build and insert Res = G_PTR_ADD Op0, Op1.

MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)

Build and insert G_STORE Val, Addr, MMO.

MachineInstrBuilder buildInstr(unsigned Opcode)

Build and insert = Opcode .

MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)

Build and insert Res = G_FRAME_INDEX Idx.

MachineFunction & getMF()

Getter for the function we currently build.

MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)

Build and insert Res = G_ANYEXT Op0.

MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

Build and insert Res = G_OR Op0, Op1.

MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)

Build but don't insert = Opcode .

MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)

Build and insert Res = COPY Op.

virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)

Build and insert Res = G_CONSTANT Val.

Register getReg(unsigned Idx) const

Get the register for the operand index.

const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

@ MOLoad

The memory access reads data.

@ MOInvariant

The memory access always returns the same value (or traps).

@ MOStore

The memory access writes data.

void setReg(Register Reg)

Change the register this operand corresponds to.

static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Wrapper class representing virtual and physical registers.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

Register getStackPtrOffsetReg() const

Register getScratchRSrcReg() const

Returns the physical register reserved for use as the resource descriptor for scratch accesses.

unsigned getBytesInStackArgArea() const

void setIfReturnsVoid(bool Value)

MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const

AMDGPUFunctionArgInfo & getArgInfo()

void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const

void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const

Allocate implicit function VGPR arguments in fixed registers.

void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const

void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

unsigned GuaranteedTailCallOpt

GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isIntegerTy() const

True if this is an instance of IntegerType.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ PRIVATE_ADDRESS

Address space for private memory.

bool isEntryFunctionCC(CallingConv::ID CC)

LLVM_READNONE bool isKernel(CallingConv::ID CC)

bool isChainCC(CallingConv::ID CC)

bool isShader(CallingConv::ID cc)

bool isGraphics(CallingConv::ID cc)

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

@ Fast

Attempts to make calls as fast as possible (e.g.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

NodeType

ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.

@ ANY_EXTEND

ANY_EXTEND - Used for integer types. The high bits are undefined.

@ SIGN_EXTEND

Conversion operators.

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

Reg

All possible values of the reg field in the ModR/M byte.

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)

Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

bool isAligned(Align Lhs, uint64_t SizeInBytes)

Checks that SizeInBytes is a multiple of the alignment.

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)

CCAssignFn - This function assigns a location for Val, updating State to reflect the change.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())

ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

LLT getLLTForType(Type &Ty, const DataLayout &DL)

Construct a low-level type based on an LLVM type.

Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)

ArgDescriptor WorkItemIDZ

ArgDescriptor WorkItemIDY

std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const

ArgDescriptor WorkItemIDX

This struct is a compact representation of a valid (non-zero power of two) alignment.

MCRegister getRegister() const

static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)

Helper struct shared between Function Specialization and SCCP Solver.

const Value * OrigValue

Optionally track the original IR value for the argument.

SmallVector< Register, 4 > Regs

SmallVector< ISD::ArgFlagsTy, 4 > Flags

Base class for ValueHandlers used for arguments coming into the current function, or for return value...

void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA) override

Provides a default implementation for argument handling.

Register buildExtensionHint(const CCValAssign &VA, Register SrcReg, LLT NarrowTy)

Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on VA, returning the new register ...

Base class for ValueHandlers used for arguments passed to a function call, or for return values.

uint64_t StackSize

The size of the currently allocated portion of the stack.

MachineIRBuilder & MIRBuilder

virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags)=0

Materialize a VReg containing the address of the specified stack-based object.

virtual void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA)=0

The specified value has been assigned to a stack location.

Register extendRegister(Register ValReg, const CCValAssign &VA, unsigned MaxSizeBits=0)

Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.

virtual void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA)=0

The specified value has been assigned to a physical register, handle the appropriate COPY (either to ...

Type * getTypeForEVT(LLVMContext &Context) const

This method returns an LLVM type corresponding to the specified EVT.

bool isScalarInteger() const

Return true if this is an integer, but not a vector.

This class contains a discriminated union of information about pointers in memory operands,...

static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)

Stack pointer relative access.

static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.