LLVM: lib/Target/AMDGPU/AMDGPUCallLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

25#include "llvm/IR/IntrinsicsAMDGPU.h"

26

27#define DEBUG_TYPE "amdgpu-call-lowering"

28

29using namespace llvm;

30

31namespace {

32

33

37

38

40 }

41

43}

44

48 : OutgoingValueHandler(B, MRI), MIB(MIB) {}

49

51

56 }

57

62 }

63

66 Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);

67

68

69

70

72 = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());

73 if (TRI->isSGPRReg(MRI, PhysReg)) {

74 LLT Ty = MRI.getType(ExtReg);

76 if (Ty != S32) {

77

78

79 assert(Ty.getSizeInBits() == 32);

80 if (Ty.isPointer())

81 ExtReg = MIRBuilder.buildPtrToInt(S32, ExtReg).getReg(0);

82 else

83 ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);

84 }

85

86 auto ToSGPR = MIRBuilder

87 .buildIntrinsic(Intrinsic::amdgcn_readfirstlane,

88 {MRI.getType(ExtReg)})

89 .addReg(ExtReg);

90 ExtReg = ToSGPR.getReg(0);

91 }

92

93 MIRBuilder.buildCopy(PhysReg, ExtReg);

95 }

96};

97

100

102 : IncomingValueHandler(B, MRI) {}

103

107 auto &MFI = MIRBuilder.getMF().getFrameInfo();

108

109

110

111 const bool IsImmutable = !Flags.isByVal();

112 int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);

114 auto AddrReg = MIRBuilder.buildFrameIndex(

116 StackUsed = std::max(StackUsed, Size + Offset);

117 return AddrReg.getReg(0);

118 }

119

122 markPhysRegUsed(PhysReg);

123

125

126

127 auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);

128

129

130

131 auto Extended =

132 buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));

133 MIRBuilder.buildTrunc(ValVReg, Extended);

134 return;

135 }

136

137 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);

138 }

139

144

148 MIRBuilder.buildLoad(ValVReg, Addr, *MMO);

149 }

150

151

152

153

154 virtual void markPhysRegUsed(unsigned PhysReg) = 0;

155};

156

157struct FormalArgHandler : public AMDGPUIncomingArgHandler {

159 : AMDGPUIncomingArgHandler(B, MRI) {}

160

161 void markPhysRegUsed(unsigned PhysReg) override {

162 MIRBuilder.getMBB().addLiveIn(PhysReg);

163 }

164};

165

166struct CallReturnHandler : public AMDGPUIncomingArgHandler {

169 : AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}

170

171 void markPhysRegUsed(unsigned PhysReg) override {

173 }

174

176};

177

178struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {

179

180

181 int FPDiff;

182

183

185

186 bool IsTailCall;

187

190 bool IsTailCall = false, int FPDiff = 0)

191 : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),

192 IsTailCall(IsTailCall) {}

193

200

201 if (IsTailCall) {

206 return FIReg.getReg(0);

207 }

208

210

213 if (ST.enableFlatScratch()) {

214

217 } else {

218

219

220

221 SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},

223 }

224 }

225

227

228 auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);

230 return AddrReg.getReg(0);

231 }

232

239

243 MIRBuilder.buildStore(ValVReg, Addr, *MMO);

244 }

245

247 unsigned ValRegIndex, Register Addr, LLT MemTy,

251 ? extendRegister(Arg.Regs[ValRegIndex], VA)

252 : Arg.Regs[ValRegIndex];

253 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);

254 }

255};

256}

257

261

262

264 switch (MIOpc) {

265 case TargetOpcode::G_SEXT:

267 case TargetOpcode::G_ZEXT:

269 case TargetOpcode::G_ANYEXT:

271 default:

273 }

274}

275

276bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,

279 bool IsVarArg) const {

280

282 return true;

283

286 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,

288

289 return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));

290}

291

292

293

297 if (!Val)

298 return true;

299

300 auto &MF = B.getMF();

303 MachineRegisterInfo *MRI = B.getMRI();

304 LLVMContext &Ctx = F.getContext();

305

308

312 "For each split Type there should be exactly one VReg.");

313

315

316 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {

317 EVT VT = SplitEVTs[i];

320 setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);

321

323 unsigned ExtendOp = TargetOpcode::G_ANYEXT;

324 if (RetInfo.Flags[0].isSExt()) {

325 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");

326 ExtendOp = TargetOpcode::G_SEXT;

327 } else if (RetInfo.Flags[0].isZExt()) {

328 assert(RetInfo.Regs.size() == 1 && "expect only simple return values");

329 ExtendOp = TargetOpcode::G_ZEXT;

330 }

331

332 EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,

334 if (ExtVT != VT) {

337 Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);

338 }

339 }

340

341 if (Reg != RetInfo.Regs[0]) {

342 RetInfo.Regs[0] = Reg;

343

344 setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);

345 }

346

348 }

349

350 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());

351

353 AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);

355 CC, F.isVarArg());

356}

357

361

365

366 assert(!Val == VRegs.empty() && "Return value without a vreg");

367

368 CallingConv::ID CC = B.getMF().getFunction().getCallingConv();

370 const bool IsWaveEnd =

372 if (IsWaveEnd) {

373 B.buildInstr(AMDGPU::S_ENDPGM)

374 .addImm(0);

375 return true;

376 }

377

379 unsigned ReturnOpc = IsWholeWave ? AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN

380 : IsShader ? AMDGPU::SI_RETURN_TO_EPILOG

381 : AMDGPU::SI_RETURN;

382 auto Ret = B.buildInstrNoInsert(ReturnOpc);

383

386 else if (!lowerReturnVal(B, Val, VRegs, Ret))

387 return false;

388

389 if (IsWholeWave)

390 addOriginalExecToReturn(B.getMF(), Ret);

391

392

393

394 B.insertInstr(Ret);

395 return true;

396}

397

405 Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);

406

408

409 B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);

410}

411

414 Align Alignment) const {

420

422

425 splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);

426

427 unsigned Idx = 0;

428 for (ArgInfo &SplitArg : SplitArgs) {

429 Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);

430 lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);

431

433 if (SplitArg.Flags[0].isPointer()) {

434

435 LLT PtrTy = LLT::pointer(SplitArg.Flags[0].getPointerAddrSpace(),

438 : PtrTy;

439 }

440

442 PtrInfo,

446

447 assert(SplitArg.Regs.size() == 1);

448

449 B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);

450 ++Idx;

451 }

452}

453

454

460

463 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);

464 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);

465 CCInfo.AllocateReg(PrivateSegmentBufferReg);

466 }

467

470 MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);

472 }

473

476 MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);

478 }

479

484 Register VReg = MRI.createGenericVirtualRegister(P4);

485 MRI.addLiveIn(InputPtrReg, VReg);

486 B.getMBB().addLiveIn(InputPtrReg);

487 B.buildCopy(VReg, InputPtrReg);

489 }

490

493 MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);

495 }

496

498 Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);

499 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);

501 }

502

504 Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(TRI);

505 MF.addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);

506 CCInfo.AllocateReg(PrivateSegmentSizeReg);

507 }

508

509

510

511}

512

523

525 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());

526

528

529 unsigned i = 0;

530 const Align KernArgBaseAlign(16);

532 uint64_t ExplicitArgOffset = 0;

533

534

535 for (auto &Arg : F.args()) {

536

537 if (Arg.hasAttribute("amdgpu-hidden-argument")) {

538 LLVM_DEBUG(dbgs() << "Preloading hidden arguments is not supported\n");

539 return false;

540 }

541

542 const bool IsByRef = Arg.hasByRefAttr();

543 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();

544 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);

545 if (AllocSize == 0)

546 continue;

547

548 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;

549 Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);

550

551 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;

552 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;

553

554 if (Arg.use_empty()) {

555 ++i;

556 continue;

557 }

558

560

561 if (IsByRef) {

562 unsigned ByRefAS = cast(Arg.getType())->getAddressSpace();

563

565 "expected only one register for byval pointers");

567 lowerParameterPtr(VRegs[i][0], B, ArgOffset);

568 } else {

570 Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);

571 lowerParameterPtr(PtrReg, B, ArgOffset);

572

573 B.buildAddrSpaceCast(VRegs[i][0], PtrReg);

574 }

575 } else {

576 ArgInfo OrigArg(VRegs[i], Arg, i);

577 const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;

579 lowerParameter(B, OrigArg, ArgOffset, Alignment);

580 }

581

582 ++i;

583 }

584

585 if (Info->getNumKernargPreloadedSGPRs())

586 Info->setNumWaveDispatchSGPRs(Info->getNumUserSGPRs());

587

588 TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);

589 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);

590 return true;

591}

592

597

598

599

600

603

606

614

616 CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());

618

620 Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);

621 MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);

622 CCInfo.AllocateReg(ImplicitBufferPtrReg);

623 }

624

625

627 Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);

628 MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);

630 }

631

633 unsigned Idx = 0;

634 unsigned PSInputNum = 0;

635

636

637

640

641 for (auto &Arg : F.args()) {

642 if (DL.getTypeStoreSize(Arg.getType()) == 0)

643 continue;

644

645 if (Info->isWholeWaveFunction() && Idx == 0) {

646 assert(VRegs[Idx].size() == 1 && "Expected only one register");

647

648

649 B.buildInstr(AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)

650 .addDef(VRegs[Idx][0]);

651

652 ++Idx;

653 continue;

654 }

655

656 const bool InReg = Arg.hasAttribute(Attribute::InReg);

657

658 if (Arg.hasAttribute(Attribute::SwiftSelf) ||

659 Arg.hasAttribute(Attribute::SwiftError) ||

660 Arg.hasAttribute(Attribute::Nest))

661 return false;

662

664 const bool ArgUsed = !Arg.use_empty();

665 bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);

666

667 if (!SkipArg) {

668 Info->markPSInputAllocated(PSInputNum);

669 if (ArgUsed)

670 Info->markPSInputEnabled(PSInputNum);

671 }

672

673 ++PSInputNum;

674

675 if (SkipArg) {

676 for (Register R : VRegs[Idx])

677 B.buildUndef(R);

678

679 ++Idx;

680 continue;

681 }

682 }

683

684 ArgInfo OrigArg(VRegs[Idx], Arg, Idx);

685 const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;

687

689 ++Idx;

690 }

691

692

693

694

695

696

697

698

699

700

701

702

703

704

706 if ((Info->getPSInputAddr() & 0x7F) == 0 ||

707 ((Info->getPSInputAddr() & 0xF) == 0 &&

708 Info->isPSInputAllocated(11))) {

711 Info->markPSInputAllocated(0);

712 Info->markPSInputEnabled(0);

713 }

714

715 if (Subtarget.isAmdPalOS()) {

716

717

718

719

720

721

722

723

724 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();

725 if ((PsInputBits & 0x7F) == 0 ||

726 ((PsInputBits & 0xF) == 0 &&

727 (PsInputBits >> 11 & 1)))

728 Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));

729 }

730 }

731

733 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());

734

735 if (MBB.empty())

736 B.setInstr(*MBB.begin());

737

738 if (!IsEntryFunc && !IsGraphics) {

739

740 TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);

741

742 if (!Subtarget.enableFlatScratch())

743 CCInfo.AllocateReg(Info->getScratchRSrcReg());

744 TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);

745 }

746

749 return false;

750

751 if (IsEntryFunc) {

752

753

754 Info->setNumWaveDispatchSGPRs(

756 Info->setNumWaveDispatchVGPRs(

758 }

759

762 return false;

763

765

766

767 if (IsEntryFunc)

768 TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);

769

770

771

772

773

774 Info->setBytesInStackArgArea(StackSize);

775

776

777 B.setMBB(MBB);

778

779 return true;

780}

781

784 SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,

787

788

789

790 if (!Info.CB)

791 return true;

792

795

798

799

800

801

802

812 };

813

814 static constexpr StringLiteral ImplicitAttrNames[][2] = {

815 {"amdgpu-no-dispatch-ptr", ""},

816 {"amdgpu-no-queue-ptr", ""},

817 {"amdgpu-no-implicitarg-ptr", ""},

818 {"amdgpu-no-dispatch-id", ""},

819 {"amdgpu-no-workgroup-id-x", "amdgpu-no-cluster-id-x"},

820 {"amdgpu-no-workgroup-id-y", "amdgpu-no-cluster-id-y"},

821 {"amdgpu-no-workgroup-id-z", "amdgpu-no-cluster-id-z"},

822 {"amdgpu-no-lds-kernel-id", ""},

823 };

824

826

830

831 unsigned I = 0;

832 for (auto InputID : InputRegs) {

835 LLT ArgTy;

836

837

839 return AttrName.empty() || Info.CB->hasFnAttr(AttrName);

840 }))

841 continue;

842

843 std::tie(OutgoingArg, ArgRC, ArgTy) =

845 if (!OutgoingArg)

846 continue;

847

850 std::tie(IncomingArg, IncomingArgRC, ArgTy) =

851 CallerArgInfo.getPreloadedValue(InputID);

852 assert(IncomingArgRC == ArgRC);

853

854 Register InputReg = MRI.createGenericVirtualRegister(ArgTy);

855

856 if (IncomingArg) {

857 LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);

859 LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);

861 std::optional<uint32_t> Id =

863 if (Id) {

865 } else {

867 }

868 } else {

869

870

872 }

873

875 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);

878 } else {

879 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");

880 return false;

881 }

882 }

883

884

885

888 LLT ArgTy;

889

890 std::tie(OutgoingArg, ArgRC, ArgTy) =

892 if (!OutgoingArg)

893 std::tie(OutgoingArg, ArgRC, ArgTy) =

895 if (!OutgoingArg)

896 std::tie(OutgoingArg, ArgRC, ArgTy) =

898 if (!OutgoingArg)

899 return false;

900

901 auto WorkitemIDX =

903 auto WorkitemIDY =

905 auto WorkitemIDZ =

907

908 const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);

909 const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);

910 const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);

912

913 const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");

914 const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");

915 const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");

916

917

918

920 if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&

921 NeedWorkItemIDX) {

922 if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {

923 InputReg = MRI.createGenericVirtualRegister(S32);

924 LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArgX,

925 std::get<1>(WorkitemIDX),

926 std::get<2>(WorkitemIDX));

927 } else {

929 }

930 }

931

932 if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&

933 NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {

935 LI->buildLoadInputValue(Y, MIRBuilder, IncomingArgY,

936 std::get<1>(WorkitemIDY), std::get<2>(WorkitemIDY));

937

939 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;

940 }

941

942 if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&

943 NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {

945 LI->buildLoadInputValue(Z, MIRBuilder, IncomingArgZ,

946 std::get<1>(WorkitemIDZ), std::get<2>(WorkitemIDZ));

947

949 InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;

950 }

951

952 if (!InputReg &&

953 (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {

954 InputReg = MRI.createGenericVirtualRegister(S32);

955 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {

956

957

958

959

961 } else {

962

963

965 IncomingArgX ? *IncomingArgX :

966 IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);

967 LI->buildLoadInputValue(InputReg, MIRBuilder, &IncomingArg,

968 &AMDGPU::VGPR_32RegClass, S32);

969 }

970 }

971

973 if (InputReg)

974 ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);

975

978 } else {

979 LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");

980 return false;

981 }

982

983 return true;

984}

985

986

987

988static std::pair<CCAssignFn *, CCAssignFn *>

992

994 bool IsTailCall, bool IsWave32,

996 bool IsDynamicVGPRChainCall = false) {

997

999 "Indirect calls can't be tail calls, "

1000 "because the address can be divergent");

1001 if (!IsTailCall)

1002 return AMDGPU::G_SI_CALL;

1003

1005 if (IsDynamicVGPRChainCall)

1006 return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32_DVGPR

1007 : AMDGPU::SI_CS_CHAIN_TC_W64_DVGPR;

1008 return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;

1009 }

1010

1013 return AMDGPU::SI_TCRETURN_GFX_WholeWave;

1014

1016 return AMDGPU::SI_TCRETURN_GFX;

1017

1018 return AMDGPU::SI_TCRETURN;

1019}

1020

1021

1025 bool IsDynamicVGPRChainCall = false) {

1026 if (Info.Callee.isReg()) {

1029 } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {

1030

1031

1035 CallInst.addReg(Ptr.getReg(0));

1036

1037 if (IsDynamicVGPRChainCall) {

1038

1040 } else

1042 } else

1043 return false;

1044

1045 return true;

1046}

1047

1054

1055

1056 if (CalleeCC == CallerCC)

1057 return true;

1058

1060

1061

1062 const auto *TRI = ST.getRegisterInfo();

1063

1064 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);

1065 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);

1066 if (TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))

1067 return false;

1068

1069

1073 std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =

1075

1078 std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =

1080

1081

1082

1084 CalleeAssignFnVarArg);

1086 CallerAssignFnVarArg);

1087 return resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner);

1088}

1089

1093

1094 if (OutArgs.empty())

1095 return true;

1096

1101

1104 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);

1105

1106

1108 CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());

1110

1112 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");

1113 return false;

1114 }

1115

1116

1119 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");

1120 return false;

1121 }

1122

1123

1126 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);

1129}

1130

1134

1135 if (!Info.IsTailCall)

1136 return false;

1137

1138

1139

1140 if (Info.Callee.isReg())

1141 return false;

1142

1147

1149 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);

1150

1151

1152 if (!CallerPreserved)

1153 return false;

1154

1156 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");

1157 return false;

1158 }

1159

1161 return A.hasByValAttr() || A.hasSwiftErrorAttr();

1162 })) {

1163 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval "

1164 "or swifterror arguments\n");

1165 return false;

1166 }

1167

1168

1172 }

1173

1174

1175

1179 << "... Caller and callee have incompatible calling conventions.\n");

1180 return false;

1181 }

1182

1183

1184

1185

1187 return false;

1188

1189 LLVM_DEBUG(dbgs() << "... Call is eligible for tail call optimization.\n");

1190 return true;

1191}

1192

1193

1194

1195

1200 ArrayRef<std::pair<MCRegister, Register>> ImplicitArgRegs) const {

1201 if (!ST.enableFlatScratch()) {

1202

1203

1206

1208 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51

1209 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;

1210

1211 MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg);

1213 }

1214

1215 for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {

1218 }

1219}

1220

1221namespace {

1222

1223

1224enum ChainCallArgIdx {

1225 Exec = 1,

1226 Flags = 4,

1227 NumVGPRs = 5,

1228 FallbackExec = 6,

1229 FallbackCallee = 7,

1230};

1231}

1232

1244

1245

1247

1248

1252 std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);

1253

1255 if (!IsSibCall)

1256 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);

1257

1259 bool IsDynamicVGPRChainCall = false;

1260

1261 if (IsChainCall) {

1262 ArgInfo FlagsArg = Info.OrigArgs[ChainCallArgIdx::Flags];

1264 if (FlagsValue.isZero()) {

1265 if (Info.OrigArgs.size() != 5) {

1266 LLVM_DEBUG(dbgs() << "No additional args allowed if flags == 0\n");

1267 return false;

1268 }

1270 IsDynamicVGPRChainCall = true;

1271

1272 if (Info.OrigArgs.size() != 8) {

1273 LLVM_DEBUG(dbgs() << "Expected 3 additional args\n");

1274 return false;

1275 }

1276

1277

1278 if (!ST.isWave32()) {

1280 F, "dynamic VGPR mode is only supported for wave32"));

1281 return false;

1282 }

1283

1284 ArgInfo FallbackExecArg = Info.OrigArgs[ChainCallArgIdx::FallbackExec];

1285 assert(FallbackExecArg.Regs.size() == 1 &&

1286 "Expected single register for fallback EXEC");

1287 if (!FallbackExecArg.Ty->isIntegerTy(ST.getWavefrontSize())) {

1288 LLVM_DEBUG(dbgs() << "Bad type for fallback EXEC\n");

1289 return false;

1290 }

1291 }

1292 }

1293

1294 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true,

1295 ST.isWave32(), CalleeCC, IsDynamicVGPRChainCall);

1297

1298 if (FuncInfo->isWholeWaveFunction())

1299 addOriginalExecToReturn(MF, MIB);

1300

1301

1302 unsigned CalleeIdx = MIB->getNumOperands();

1303

1305 return false;

1306

1307

1308

1309 MIB.addImm(0);

1310

1311

1312

1313 if (IsChainCall) {

1314 auto AddRegOrImm = [&](const ArgInfo &Arg) {

1316 MIB.addImm(CI->getSExtValue());

1317 } else {

1318 MIB.addReg(Arg.Regs[0]);

1319 unsigned Idx = MIB->getNumOperands() - 1;

1321 MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),

1322 MIB->getOperand(Idx), Idx));

1323 }

1324 };

1325

1326 ArgInfo ExecArg = Info.OrigArgs[ChainCallArgIdx::Exec];

1327 assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC");

1328

1329 if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize())) {

1331 return false;

1332 }

1333

1334 AddRegOrImm(ExecArg);

1335 if (IsDynamicVGPRChainCall)

1336 std::for_each(Info.OrigArgs.begin() + ChainCallArgIdx::NumVGPRs,

1337 Info.OrigArgs.end(), AddRegOrImm);

1338 }

1339

1340

1341 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);

1342 MIB.addRegMask(Mask);

1343

1344

1345

1346

1347

1348

1349 int FPDiff = 0;

1350

1351

1352

1353

1354 unsigned NumBytes = 0;

1355 if (!IsSibCall) {

1356

1357

1358

1359 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();

1361 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());

1362

1363

1366 return false;

1367

1368

1369

1371

1372

1373

1374

1375 FPDiff = NumReusableBytes - NumBytes;

1376

1377

1378

1379

1380

1381

1383 "unaligned stack on tail call");

1384 }

1385

1387 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());

1388

1389

1390

1391

1393

1397

1398 if (passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))

1399 return false;

1400 }

1401

1403

1405 return false;

1406

1407

1408 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, true, FPDiff);

1409 if (handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))

1410 return false;

1411

1412 if (Info.ConvergenceCtrlToken) {

1414 }

1416 ImplicitArgRegs);

1417

1418

1419

1420 if (!IsSibCall) {

1421 MIB->getOperand(CalleeIdx + 1).setImm(FPDiff);

1423

1424

1425

1426

1428 }

1429

1430

1432

1433

1434

1435 if (MIB->getOpcode() == AMDGPU::SI_TCRETURN_GFX_WholeWave) {

1436 MIB->getOperand(0).setReg(

1438 *MIB, MIB->getDesc(), MIB->getOperand(0), 0));

1439 }

1440

1441

1442

1443

1444

1445

1446

1447 if (MIB->getOperand(CalleeIdx).isReg()) {

1449 MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),

1450 MIB->getOperand(CalleeIdx), CalleeIdx));

1451 }

1452

1454 Info.LoweredTailCall = true;

1455 return true;

1456}

1457

1458

1461 ArgInfo Callee = Info.OrigArgs[0];

1462 ArgInfo SGPRArgs = Info.OrigArgs[2];

1463 ArgInfo VGPRArgs = Info.OrigArgs[3];

1464

1468

1469

1470

1471 const Value *CalleeV = Callee.OrigValue->stripPointerCasts();

1474 Info.CallConv = F->getCallingConv();

1475 } else {

1476 assert(Callee.Regs.size() == 1 && "Too many regs for the callee");

1479

1480 }

1481

1482

1483 Info.IsVarArg = false;

1484

1487 "SGPR arguments should be marked inreg");

1490 "VGPR arguments should not be marked inreg");

1491

1495

1496 Info.IsMustTailCall = true;

1497 return lowerTailCall(MIRBuilder, Info, OutArgs);

1498}

1499

1502 if (Function *F = Info.CB->getCalledFunction())

1503 if (F->isIntrinsic()) {

1504 switch (F->getIntrinsicID()) {

1505 case Intrinsic::amdgcn_cs_chain:

1507 case Intrinsic::amdgcn_call_whole_wave:

1509

1510

1511

1514 Info.OrigArgs.erase(Info.OrigArgs.begin());

1515 Info.IsVarArg = false;

1516 break;

1517 default:

1519 }

1520 }

1521

1522 if (Info.IsVarArg) {

1523 LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");

1524 return false;

1525 }

1526

1530

1535

1537 for (auto &OrigArg : Info.OrigArgs)

1539

1541 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())

1543

1544

1545 bool CanTailCallOpt =

1547

1548

1549 if (Info.IsMustTailCall && !CanTailCallOpt) {

1550 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");

1551 return false;

1552 }

1553

1554 Info.IsTailCall = CanTailCallOpt;

1555 if (CanTailCallOpt)

1556 return lowerTailCall(MIRBuilder, Info, OutArgs);

1557

1558

1561 std::tie(AssignFnFixed, AssignFnVarArg) =

1563

1564 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)

1567

1568

1569

1570 unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, ST.isWave32(),

1571 Info.CallConv);

1572

1574 MIB.addDef(TRI->getReturnAddressReg(MF));

1575

1576 if (!Info.IsConvergent)

1578

1580 return false;

1581

1582

1583 const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);

1584 MIB.addRegMask(Mask);

1585

1587 CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());

1588

1589

1590

1591

1593

1596

1597 if (passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))

1598 return false;

1599 }

1600

1601

1604 return false;

1605

1606 AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);

1607 if (handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))

1608 return false;

1609

1611

1612 if (Info.ConvergenceCtrlToken) {

1614 }

1616 ImplicitArgRegs);

1617

1618

1620

1621

1622

1623

1624

1625

1626

1627 if (MIB->getOperand(1).isReg()) {

1629 MF, *TRI, MRI, *ST.getInstrInfo(),

1630 *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),

1631 1));

1632 }

1633

1634

1636

1637

1638

1639

1640 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {

1641 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,

1642 Info.IsVarArg);

1644 CallReturnHandler Handler(MIRBuilder, MRI, MIB);

1646 Info.CallConv, Info.IsVarArg))

1647 return false;

1648 }

1649

1650 uint64_t CalleePopBytes = NumBytes;

1651

1652 MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)

1654 .addImm(CalleePopBytes);

1655

1656 if (!Info.CanLowerReturn) {

1657 insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,

1658 Info.DemoteRegister, Info.DemoteStackIndex);

1659 }

1660

1661 return true;

1662}

1663

1664void AMDGPUCallLowering::addOriginalExecToReturn(

1668 const MachineInstr *Setup = TII->getWholeWaveFunctionSetup(MF);

1669 Ret.addReg(Setup->getOperand(0).getReg());

1670}

unsigned const MachineRegisterInfo * MRI

static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)

static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI)

Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)

Definition AMDGPUCallLowering.cpp:263

static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)

Definition AMDGPUCallLowering.cpp:455

static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info, bool IsDynamicVGPRChainCall=false)

Definition AMDGPUCallLowering.cpp:1022

This file describes how to lower LLVM calls to machine code calls.

This file declares the targeting of the Machinelegalizer class for AMDGPU.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

This file declares the MachineIRBuilder class.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

static constexpr MCPhysReg SPReg

Interface definition for SIRegisterInfo.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static const AMDGPUFunctionArgInfo FixedABIFunctionInfo

bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &OutArgs) const

Definition AMDGPUCallLowering.cpp:1233

bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl< ArgInfo > &InArgs, SmallVectorImpl< ArgInfo > &OutArgs) const

Returns true if the call can be lowered as a tail call.

Definition AMDGPUCallLowering.cpp:1131

bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs) const

Definition AMDGPUCallLowering.cpp:513

bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override

This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...

Definition AMDGPUCallLowering.cpp:358

void handleImplicitCallArguments(MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, CallingConv::ID CalleeCC, ArrayRef< std::pair< MCRegister, Register > > ImplicitArgRegs) const

Definition AMDGPUCallLowering.cpp:1196

bool areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &OutArgs) const

Definition AMDGPUCallLowering.cpp:1090

bool lowerChainCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const

Lower a call to the @llvm.amdgcn.cs.chain intrinsic.

Definition AMDGPUCallLowering.cpp:1459

AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)

Definition AMDGPUCallLowering.cpp:258

bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register > > &ArgRegs, CallLoweringInfo &Info) const

Definition AMDGPUCallLowering.cpp:782

bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register > > VRegs, FunctionLoweringInfo &FLI) const override

This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...

Definition AMDGPUCallLowering.cpp:593

bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override

This hook must be implemented to lower the given call instruction, including argument and return valu...

Definition AMDGPUCallLowering.cpp:1500

bool doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs) const

Definition AMDGPUCallLowering.cpp:1048

static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)

unsigned getExplicitKernelArgOffset() const

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)

Selects the correct CCAssignFn for a given CallingConvention value.

Class for arbitrary precision integers.

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

bool isOneBitSet(unsigned BitNo) const

Determine if this APInt Value only has the specified bit set.

This class represents an incoming formal argument to a Function.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

CCState - This class holds information needed while lowering arguments and return values.

unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const

getFirstUnallocated - Return the index of the first unallocated register in the set,...

MCRegister AllocateReg(MCPhysReg Reg)

AllocateReg - Attempt to allocate one register.

uint64_t getStackSize() const

Returns the size of the currently allocated portion of the stack.

CCValAssign - Represent assignment of one arg/retval to a location.

LocInfo getLocInfo() const

int64_t getLocMemOffset() const

This class represents a function call, abstracting a target machine's calling convention.

void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const

Load the returned value from the stack into virtual registers in VRegs.

bool handleAssignments(ValueHandler &Handler, SmallVectorImpl< ArgInfo > &Args, CCState &CCState, SmallVectorImpl< CCValAssign > &ArgLocs, MachineIRBuilder &MIRBuilder, ArrayRef< Register > ThisReturnRegs={}) const

Use Handler to insert code to handle the argument/return values represented by Args.

bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, SmallVectorImpl< ArgInfo > &InArgs, ValueAssigner &CalleeAssigner, ValueAssigner &CallerAssigner) const

void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv, SmallVectorImpl< uint64_t > *Offsets=nullptr) const

Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.

void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const

Insert the hidden sret ArgInfo to the beginning of SplitArgs.

bool determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, bool IsVarArg, ArrayRef< Register > ThisReturnRegs={}) const

Invoke ValueAssigner::assignArg on each of the given Args and then use Handler to move them to the as...

void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const

Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.

bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ArgInfo > &OutVals) const

Check whether parameters to a call that are passed in callee saved registers are the same as from the...

bool determineAssignments(ValueAssigner &Assigner, SmallVectorImpl< ArgInfo > &Args, CCState &CCInfo) const

Analyze the argument list in Args, using Assigner to populate CCInfo.

bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const

CallLowering(const TargetLowering *TLI)

const TargetLowering * getTLI() const

Getter for generic TargetLowering class.

void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const

A parsed version of the target data layout string in and methods for querying it.

Diagnostic information for unsupported feature in backend.

FunctionLoweringInfo - This contains information that is global to a function that is used when lower...

Register DemoteRegister

DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...

bool CanLowerReturn

CanLowerReturn - true iff the function's return value can be lowered to registers.

iterator_range< arg_iterator > args()

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

const SIRegisterInfo * getRegisterInfo() const override

bool hasKernargSegmentPtr() const

bool hasDispatchID() const

bool hasPrivateSegmentBuffer() const

bool hasImplicitBufferPtr() const

bool hasPrivateSegmentSize() const

bool hasDispatchPtr() const

bool hasFlatScratchInit() const

unsigned getAddressSpace() const

constexpr unsigned getScalarSizeInBits() const

static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)

Get a low-level vector of some number of elements and element width.

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

constexpr bool isVector() const

static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)

Get a low-level pointer in the given address space.

constexpr ElementCount getElementCount() const

static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)

Get a low-level fixed-width vector of some number of elements and element width.

TypeSize getSizeInBits() const

Returns the size of the specified MVT in bits.

LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)

Create a new object at a fixed location on the stack.

void setHasTailCall(bool V=true)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const DataLayout & getDataLayout() const

Return the DataLayout attached to the Module associated to this MF.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)

addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Helper class to build MachineInstr.

MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)

Insert an existing instruction at the insertion point.

MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)

Build and insert Res = G_GLOBAL_VALUE GV.

MachineInstrBuilder buildUndef(const DstOp &Res)

Build and insert Res = IMPLICIT_DEF.

MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)

Build and insert Res = G_PTR_ADD Op0, Op1.

MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)

Build and insert G_STORE Val, Addr, MMO.

MachineInstrBuilder buildInstr(unsigned Opcode)

Build and insert = Opcode .

MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)

Build and insert Res = G_FRAME_INDEX Idx.

MachineFunction & getMF()

Getter for the function we currently build.

MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)

Build and insert Res = G_ANYEXT Op0.

MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

Build and insert Res = G_OR Op0, Op1.

MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)

Build but don't insert = Opcode .

MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)

Build and insert Res = COPY Op.

virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)

Build and insert Res = G_CONSTANT Val.

Register getReg(unsigned Idx) const

Get the register for the operand index.

const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

@ MOLoad

The memory access reads data.

@ MOInvariant

The memory access always returns the same value (or traps).

@ MOStore

The memory access writes data.

static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Wrapper class representing virtual and physical registers.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

bool isWholeWaveFunction() const

Register getStackPtrOffsetReg() const

Register getScratchRSrcReg() const

Returns the physical register reserved for use as the resource descriptor for scratch accesses.

unsigned getBytesInStackArgArea() const

void setIfReturnsVoid(bool Value)

MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const

AMDGPUFunctionArgInfo & getArgInfo()

MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

StringRef - Represent a constant reference to a string, i.e.

constexpr bool empty() const

empty - Check if the string is empty.

unsigned GuaranteedTailCallOpt

GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isIntegerTy() const

True if this is an instance of IntegerType.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ PRIVATE_ADDRESS

Address space for private memory.

LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)

LLVM_READNONE constexpr bool mayTailCallThisCC(CallingConv::ID CC)

Return true if we might ever do TCO for calls with this calling convention.

LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)

LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)

LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)

LLVM_READNONE constexpr bool canGuaranteeTCO(CallingConv::ID CC)

LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

NodeType

ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.

@ ANY_EXTEND

ANY_EXTEND - Used for integer types. The high bits are undefined.

@ SIGN_EXTEND

Conversion operators.

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)

Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())

ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

CCAssignFn - This function assigns a location for Val, updating State to reflect the change.

bool isAligned(Align Lhs, uint64_t SizeInBytes)

Checks that SizeInBytes is a multiple of the alignment.

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

LLVM_ABI LLT getLLTForType(Type &Ty, const DataLayout &DL)

Construct a low-level type based on an LLVM type.

LLVM_ABI Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)

ArgDescriptor WorkItemIDZ

ArgDescriptor WorkItemIDY

std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const

ArgDescriptor WorkItemIDX

This struct is a compact representation of a valid (non-zero power of two) alignment.

MCRegister getRegister() const

static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)

Helper struct shared between Function Specialization and SCCP Solver.

const Value * OrigValue

Optionally track the original IR value for the argument.

SmallVector< Register, 4 > Regs

SmallVector< ISD::ArgFlagsTy, 4 > Flags

Base class for ValueHandlers used for arguments coming into the current function, or for return value...

Base class for ValueHandlers used for arguments passed to a function call, or for return values.

uint64_t StackSize

The size of the currently allocated portion of the stack.

MachineIRBuilder & MIRBuilder

Register extendRegister(Register ValReg, const CCValAssign &VA, unsigned MaxSizeBits=0)

Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.

LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const

This method returns an LLVM type corresponding to the specified EVT.

bool isScalarInteger() const

Return true if this is an integer, but not a vector.

This class contains a discriminated union of information about pointers in memory operands,...

static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)

Stack pointer relative access.

static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.