LLVM: lib/Target/AMDGPU/SIFrameLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

19

20using namespace llvm;

21

22#define DEBUG_TYPE "frame-info"

23

25 "amdgpu-spill-vgpr-to-agpr",

26 cl::desc("Enable spilling VGPRs to AGPRs"),

29

30

31

32

33

34

45

46

47

48

49

53

54 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();

55 for (unsigned i = 0; CSRegs[i]; ++i)

56 LiveUnits.addReg(CSRegs[i]);

57

58

59

60 if (Unused)

62

65 return Reg;

66 }

67

69}

70

71

72

76 bool IncludeScratchCopy = true) {

79

82 unsigned Size = TRI->getSpillSize(RC);

83 Align Alignment = TRI->getSpillAlign(RC);

84

85

86

88

89

90

91 if (IncludeScratchCopy)

93

94 if (!ScratchSGPR) {

95 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,

97

98 if (TRI->spillSGPRToVGPR() &&

100 true)) {

101

102

106

108 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "

109 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane

110 << '\n';);

111 } else {

112

114

115 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);

117 SGPR,

119 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "

121 }

122 } else {

126 LiveUnits.addReg(ScratchSGPR);

129 }

130}

131

132

133

134

141 int64_t DwordOff = 0) {

142 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

143 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;

144

150 LiveUnits.addReg(SpillReg);

151 bool IsKill = MBB.isLiveIn(SpillReg);

152 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,

153 DwordOff, MMO, nullptr, &LiveUnits);

154 if (IsKill)

156}

157

165 Register FrameReg, int64_t DwordOff = 0) {

166 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR

167 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;

168

174 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,

175 DwordOff, MMO, nullptr, &LiveUnits);

176}

177

184 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

185 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);

186 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);

187

192 } else {

193 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);

195 }

198 MBB.addLiveIn(GitPtrLo);

201}

202

207 if (LiveUnits.empty()) {

209 if (IsProlog) {

211 } else {

212

215 }

216 }

217}

218

219namespace llvm {

220

221

222

223

224

240 unsigned NumSubRegs;

241 unsigned EltSize = 4;

242

243 void saveToMemory(const int FI) const {

245 assert(!MFI.isDeadObjectIndex(FI));

246

247 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, true);

248

250 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

251 if (!TmpVGPR)

253

254 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

256 ? SuperReg

257 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

258 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)

260

261 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,

262 FI, FrameReg, DwordOff);

263 DwordOff += 4;

264 }

265 }

266

267 void saveToVGPRLane(const int FI) const {

268 assert(!MFI.isDeadObjectIndex(FI));

269

272 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

273 assert(Spill.size() == NumSubRegs);

274

275 for (unsigned I = 0; I < NumSubRegs; ++I) {

277 ? SuperReg

278 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

279 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),

280 Spill[I].VGPR)

284 }

285 }

286

287 void copyToScratchSGPR(Register DstReg) const {

288 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)

291 }

292

293 void restoreFromMemory(const int FI) {

295

296 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, false);

298 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

299 if (!TmpVGPR)

301

302 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

304 ? SuperReg

305 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

306

308 TmpVGPR, FI, FrameReg, DwordOff);

310

311 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)

313 DwordOff += 4;

314 }

315 }

316

317 void restoreFromVGPRLane(const int FI) {

320 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

321 assert(Spill.size() == NumSubRegs);

322

323 for (unsigned I = 0; I < NumSubRegs; ++I) {

325 ? SuperReg

326 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

327 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)

330 }

331 }

332

333 void copyFromScratchSGPR(Register SrcReg) const {

334 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)

337 }

338

339public:

347 : MI(MI), MBB(MBB), MF(*MBB.getParent()),

348 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),

350 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),

351 FrameReg(FrameReg) {

353 SplitParts = TRI.getRegSplitParts(RC, EltSize);

354 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();

355

356 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

357 }

358

360 switch (SI.getKind()) {

362 return saveToMemory(SI.getIndex());

364 return saveToVGPRLane(SI.getIndex());

366 return copyToScratchSGPR(SI.getReg());

367 }

368 }

369

371 switch (SI.getKind()) {

373 return restoreFromMemory(SI.getIndex());

375 return restoreFromVGPRLane(SI.getIndex());

377 return copyFromScratchSGPR(SI.getReg());

378 }

379 }

380};

381

382}

383

384

385void SIFrameLowering::emitEntryFunctionFlatScratchInit(

388 const GCNSubtarget &ST = MF.getSubtarget();

389 const SIInstrInfo *TII = ST.getInstrInfo();

391 const SIMachineFunctionInfo *MFI = MF.getInfo();

392

393

394

395

396

397

398

399

400

401

402

405

406 if (ST.isAmdPalOS()) {

407

408 LiveRegUnits LiveUnits;

411

412

414 Register FlatScrInit = AMDGPU::NoRegister;

417 AllSGPR64s = AllSGPR64s.slice(

418 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));

422 MRI.isAllocatable(Reg) && TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {

423 FlatScrInit = Reg;

424 break;

425 }

426 }

427 assert(FlatScrInit && "Failed to find free register for scratch init");

428

429 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);

430 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);

431

433

434

435

437 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

439 PtrInfo,

445 const GCNSubtarget &Subtarget = MF.getSubtarget();

449 .addImm(EncodedOffset)

452

453

454 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);

456 .addReg(FlatScrInitHi)

459 } else {

460 Register FlatScratchInitReg =

462 assert(FlatScratchInitReg);

463

465 MRI.addLiveIn(FlatScratchInitReg);

467

468 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);

469 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);

470 }

471

472

473 if (ST.flatScratchIsPointer()) {

476 .addReg(FlatScrInitLo)

477 .addReg(ScratchWaveOffsetReg);

479 FlatScrInitHi)

480 .addReg(FlatScrInitHi)

483

484 using namespace AMDGPU::Hwreg;

486 .addReg(FlatScrInitLo)

487 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));

489 .addReg(FlatScrInitHi)

490 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));

491 return;

492 }

493

494

496 .addReg(FlatScrInitLo)

497 .addReg(ScratchWaveOffsetReg);

499 AMDGPU::FLAT_SCR_HI)

500 .addReg(FlatScrInitHi)

503

504 return;

505 }

506

508

509

512

513

514

516 .addReg(FlatScrInitLo)

517 .addReg(ScratchWaveOffsetReg);

518

519

521 AMDGPU::FLAT_SCR_HI)

525}

526

527

528

531 I != E; ++I) {

533 return false;

534 }

535

536 return true;

537}

538

539

540Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(

542

543 const GCNSubtarget &ST = MF.getSubtarget();

544 const SIInstrInfo *TII = ST.getInstrInfo();

547 SIMachineFunctionInfo *MFI = MF.getInfo();

548

550

552

553 if (!ScratchRsrcReg || (MRI.isPhysRegUsed(ScratchRsrcReg) &&

556

557 if (ST.hasSGPRInitBug() ||

558 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))

559 return ScratchRsrcReg;

560

561

562

563

564

565

566

567

568

569

572 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));

573

574

575

578

579

580

581 if (MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

582 (!GITPtrLoReg || TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {

583 MRI.replaceRegWith(ScratchRsrcReg, Reg);

586 return Reg;

587 }

588 }

589

590 return ScratchRsrcReg;

591}

592

594 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();

595}

596

599 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");

600

601

602

603

604

605

606

607

608

609

610

611

619

621

624

625

626

627

628

629

630

632 if (!ST.enableFlatScratch())

633 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);

634

635

636 if (ScratchRsrcReg) {

638 if (&OtherBB != &MBB) {

639 OtherBB.addLiveIn(ScratchRsrcReg);

640 }

641 }

642 }

643

644

645

646 Register PreloadedScratchRsrcReg;

647 if (ST.isAmdHsaOrMesa(F)) {

648 PreloadedScratchRsrcReg =

650 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {

651

652

653 MRI.addLiveIn(PreloadedScratchRsrcReg);

654 MBB.addLiveIn(PreloadedScratchRsrcReg);

655 }

656 }

657

658

659

662

663

664

665

666

667

668 Register ScratchWaveOffsetReg;

669 if (PreloadedScratchWaveOffsetReg &&

670 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {

673 AllSGPRs = AllSGPRs.slice(

674 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));

677 if (MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

678 TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {

679 ScratchWaveOffsetReg = Reg;

680 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)

682 break;

683 }

684 }

685

686

687

688 if (!ScratchWaveOffsetReg)

690 "could not find temporary scratch offset register in prolog");

691 } else {

692 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;

693 }

694 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);

695

702 }

703

708 }

709 } else {

710

711

712

713

714

715

723 4,

726

730

731

732

738

739

740

743 ST.hasInv2PiInlineImm())) {

747 } else {

751 }

752 }

753 }

754

755 bool NeedsFlatScratchInit =

757 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||

759

760 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&

761 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {

762 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);

763 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);

764 }

765

766 if (NeedsFlatScratchInit) {

767 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);

768 }

769

770 if (ScratchRsrcReg) {

771 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,

772 PreloadedScratchRsrcReg,

773 ScratchRsrcReg, ScratchWaveOffsetReg);

774 }

775

776 if (ST.hasWaitXCnt()) {

777

778

779

780 unsigned RegEncoding =

784 .addImm(RegEncoding);

785 }

786}

787

788

789void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(

792 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {

793

799

800 if (ST.isAmdPalOS()) {

801

802

803 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

804 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);

805

807

808

809

811 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);

813 PtrInfo,

822 .addImm(EncodedOffset)

826

827

828

829

830

831

832

833

834 if (ST.isWave32()) {

835 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);

839 }

840 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {

841 assert(ST.isAmdHsaOrMesa(Fn));

842 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

843

844 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);

845 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);

846

847

848 uint64_t Rsrc23 = TII->getScratchRsrcWords23();

849

851 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

852

854 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);

855

859 } else {

860 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

861

864 PtrInfo,

874

877 }

878 } else {

879 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

880 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);

881

885

889 }

890

894

898 } else if (ST.isAmdHsaOrMesa(Fn)) {

899 assert(PreloadedScratchRsrcReg);

900

901 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {

904 }

905 }

906

907

908

909

910

911

912

913

914

915

916 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

917 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);

918

919

920

922 .addReg(ScratchRsrcSub0)

923 .addReg(ScratchWaveOffsetReg)

925 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)

926 .addReg(ScratchRsrcSub1)

930}

931

933 switch (ID) {

937 return true;

941 return false;

942 }

944}

945

946

947

953 bool EnableInactiveLanes) {

960

962

964

965

966 assert(IsProlog && "Epilog should look at return, not setup");

967 ScratchExecCopy =

968 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();

969 assert(ScratchExecCopy && "Couldn't find copy of EXEC");

970 } else {

972 MRI, LiveUnits, *TRI.getWaveMaskRegClass());

973 }

974

975 if (!ScratchExecCopy)

977

978 LiveUnits.addReg(ScratchExecCopy);

979

980 const unsigned SaveExecOpc =

981 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32

982 : AMDGPU::S_OR_SAVEEXEC_B32)

983 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64

984 : AMDGPU::S_OR_SAVEEXEC_B64);

985 auto SaveExec =

988

989 return ScratchExecCopy;

990}

991

1002

1003

1004

1005

1009 if (!WWMScratchRegs.empty())

1010 ScratchExecCopy =

1012 true, true);

1013

1014 auto StoreWWMRegisters =

1016 for (const auto &Reg : WWMRegs) {

1018 int FI = Reg.second;

1020 VGPR, FI, FrameReg);

1021 }

1022 };

1023

1025 if (MRI.isReserved(Reg)) {

1026 MRI.addLiveIn(Reg);

1027 MBB.addLiveIn(Reg);

1028 }

1029 }

1030 StoreWWMRegisters(WWMScratchRegs);

1031

1032 auto EnableAllLanes = [&]() {

1034 };

1035

1036 if (!WWMCalleeSavedRegs.empty()) {

1037 if (ScratchExecCopy) {

1038 EnableAllLanes();

1039 } else {

1041 true,

1042 false);

1043 }

1044 }

1045

1046 StoreWWMRegisters(WWMCalleeSavedRegs);

1048

1049

1050 if (!ScratchExecCopy)

1052 true);

1053 else if (WWMCalleeSavedRegs.empty())

1054 EnableAllLanes();

1055 } else if (ScratchExecCopy) {

1056

1059 LiveUnits.addReg(ScratchExecCopy);

1060 }

1061

1063

1065

1066

1067

1068

1070 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

1071 if (!Reg)

1072 continue;

1073

1075 LiveUnits, FrameReg);

1077 }

1078

1079

1080

1083 if (!ScratchSGPRs.empty()) {

1085 for (MCPhysReg Reg : ScratchSGPRs)

1086 MBB.addLiveIn(Reg);

1087

1088 MBB.sortUniqueLiveIns();

1089 }

1090 if (!LiveUnits.empty()) {

1091 for (MCPhysReg Reg : ScratchSGPRs)

1092 LiveUnits.addReg(Reg);

1093 }

1094 }

1095}

1096

1107

1109

1110

1111

1112

1113

1115 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

1116 if (!Reg)

1117 continue;

1118

1120 LiveUnits, FrameReg);

1122 }

1123

1124

1125

1126

1130 auto RestoreWWMRegisters =

1132 for (const auto &Reg : WWMRegs) {

1134 int FI = Reg.second;

1136 VGPR, FI, FrameReg);

1137 }

1138 };

1139

1141

1142

1143 RestoreWWMRegisters(WWMCalleeSavedRegs);

1144

1145

1147 unsigned Opcode = Return.getOpcode();

1148 switch (Opcode) {

1149 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:

1150 Opcode = AMDGPU::SI_RETURN;

1151 break;

1152 case AMDGPU::SI_TCRETURN_GFX_WholeWave:

1153 Opcode = AMDGPU::SI_TCRETURN_GFX;

1154 break;

1155 default:

1157 }

1158 Register OrigExec = Return.getOperand(0).getReg();

1159

1160 if (!WWMScratchRegs.empty()) {

1164 RestoreWWMRegisters(WWMScratchRegs);

1165 }

1166

1167

1169

1170

1171 Return.removeOperand(0);

1172 Return.setDesc(TII->get(Opcode));

1173

1174 return;

1175 }

1176

1177 if (!WWMScratchRegs.empty()) {

1178 ScratchExecCopy =

1180 false, true);

1181 }

1182 RestoreWWMRegisters(WWMScratchRegs);

1183 if (!WWMCalleeSavedRegs.empty()) {

1184 if (ScratchExecCopy) {

1186 } else {

1188 false,

1189 false);

1190 }

1191 }

1192

1193 RestoreWWMRegisters(WWMCalleeSavedRegs);

1194 if (ScratchExecCopy) {

1195

1198 }

1199}

1200

1206 return;

1207 }

1208

1214

1218 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();

1220

1222

1223

1225

1227

1228

1230 if (UseSP) {

1231 assert(StackPtrReg != AMDGPU::SP_REG);

1232

1235 }

1236 }

1237

1238 bool HasFP = false;

1239 bool HasBP = false;

1241 uint32_t RoundedSize = NumBytes;

1242

1243 if (TRI.hasStackRealignment(MF))

1244 HasFP = true;

1245

1246 Register FramePtrRegScratchCopy;

1247 if (!HasFP && hasFP(MF)) {

1248

1251 FramePtrRegScratchCopy);

1252 } else {

1253

1254 Register SGPRForFPSaveRestoreCopy =

1256

1258 if (SGPRForFPSaveRestoreCopy) {

1259

1260

1261

1263 FramePtrReg,

1265 DL, TII, TRI, LiveUnits, FramePtrReg);

1267 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

1268 } else {

1269

1270

1272 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

1273 if (!FramePtrRegScratchCopy)

1275

1276 LiveUnits.addReg(FramePtrRegScratchCopy);

1278 .addReg(FramePtrReg);

1279 }

1280 }

1281

1282 if (HasFP) {

1284

1285 RoundedSize += Alignment;

1286 if (LiveUnits.empty()) {

1289 }

1290

1291

1292

1294 .addReg(StackPtrReg)

1301 And->getOperand(3).setIsDead();

1303 } else if ((HasFP = hasFP(MF))) {

1305 .addReg(StackPtrReg)

1307 }

1308

1309

1310 if (HasFP) {

1312 FramePtrRegScratchCopy);

1313 if (FramePtrRegScratchCopy)

1314 LiveUnits.removeReg(FramePtrRegScratchCopy);

1315 }

1316

1317

1318

1319

1320

1321 if ((HasBP = TRI.hasBasePointer(MF))) {

1323 .addReg(StackPtrReg)

1325 }

1326

1327 if (HasFP && RoundedSize != 0) {

1329 .addReg(StackPtrReg)

1332 Add->getOperand(3).setIsDead();

1333 }

1334

1336 (void)FPSaved;

1337 assert((!HasFP || FPSaved) &&

1338 "Needed to save FP but didn't save it anywhere");

1339

1340

1341

1343 "Saved FP but didn't need it");

1344

1346 (void)BPSaved;

1347 assert((!HasBP || BPSaved) &&

1348 "Needed to save BP but didn't save it anywhere");

1349

1350 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");

1351

1353

1354 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();

1355 }

1356}

1357

1362 return;

1363

1369

1370

1373 if (MBB.empty()) {

1374 MBBI = MBB.getLastNonDebugInstr();

1376 DL = MBBI->getDebugLoc();

1377

1378 MBBI = MBB.getFirstTerminator();

1379 }

1380

1385 : NumBytes;

1389

1390 if (RoundedSize != 0) {

1391 if (TRI.hasBasePointer(MF)) {

1395 } else if (hasFP(MF)) {

1397 .addReg(FramePtrReg)

1399 }

1400 }

1401

1402 Register FramePtrRegScratchCopy;

1403 Register SGPRForFPSaveRestoreCopy =

1405 if (FPSaved) {

1406

1407

1408

1409

1411 if (SGPRForFPSaveRestoreCopy) {

1412 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

1413 } else {

1415 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

1416 if (!FramePtrRegScratchCopy)

1418

1419 LiveUnits.addReg(FramePtrRegScratchCopy);

1420 }

1421

1423 FramePtrRegScratchCopy);

1424 }

1425

1426 if (FPSaved) {

1427

1428 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy

1429 : FramePtrRegScratchCopy;

1433 if (SGPRForFPSaveRestoreCopy)

1435 } else {

1436

1439 FramePtrRegScratchCopy);

1440 }

1441}

1442

1443#ifndef NDEBUG

1448 I != E; ++I) {

1452 return false;

1453 }

1454 }

1455

1456 return true;

1457}

1458#endif

1459

1468

1473

1479

1480 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()

1482

1483 if (SpillVGPRToAGPR) {

1484

1487

1488 bool SeenDbgInstr = false;

1489

1492 int FrameIndex;

1493 if (MI.isDebugInstr())

1494 SeenDbgInstr = true;

1495

1496 if (TII->isVGPRSpill(MI)) {

1497

1498

1499 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),

1500 AMDGPU::OpName::vaddr);

1501 int FI = MI.getOperand(FIOp).getIndex();

1503 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();

1505 TRI->isAGPR(MRI, VReg))) {

1506 assert(RS != nullptr);

1507 RS->enterBasicBlockEnd(MBB);

1508 RS->backward(std::next(MI.getIterator()));

1509 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);

1510 SpillFIs.set(FI);

1511 continue;

1512 }

1513 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||

1514 TII->isLoadFromStackSlot(MI, FrameIndex))

1516 NonVGPRSpillFIs.set(FrameIndex);

1517 }

1518 }

1519

1520

1521

1522 for (unsigned FI : SpillFIs.set_bits())

1523 if (!NonVGPRSpillFIs.test(FI))

1525

1528 MBB.addLiveIn(Reg);

1529

1531 MBB.addLiveIn(Reg);

1532

1533 MBB.sortUniqueLiveIns();

1534

1535 if (!SpillFIs.empty() && SeenDbgInstr) {

1536

1537

1538

1540 if (MI.isDebugValue()) {

1541 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;

1542 if (MI.getOperand(StackOperandIdx).isFI() &&

1544 MI.getOperand(StackOperandIdx).getIndex()) &&

1545 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {

1546 MI.getOperand(StackOperandIdx)

1547 .ChangeToRegister(Register(), false );

1548 }

1549 }

1550 }

1551 }

1552 }

1553 }

1554

1555

1556

1557

1558 bool HaveSGPRToVMemSpill =

1561 "SGPR spill should have been removed in SILowerSGPRSpills");

1562

1563

1564

1565

1567 assert(RS && "RegScavenger required if spilling");

1568

1569

1570 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));

1571

1572

1573

1574 if (HaveSGPRToVMemSpill &&

1577 }

1578 }

1579}

1580

1587

1588 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

1589

1590

1591

1592

1595 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

1596 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <

1597 TRI->getHWRegIndex(VGPRForAGPRCopy))) {

1598

1599

1600

1601

1603 MRI.reserveReg(UnusedLowVGPR, TRI);

1604 }

1605 }

1606

1607

1610 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);

1611

1612

1613

1614

1615 if (LongBranchReservedReg && UnusedLowSGPR) {

1617 MRI.reserveReg(UnusedLowSGPR, TRI);

1618 }

1619}

1620

1621

1622

1625 bool NeedExecCopyReservedReg) const {

1633

1634

1636 for (unsigned I = 0; CSRegs[I]; ++I)

1637 LiveUnits.addReg(CSRegs[I]);

1638

1640

1642 if (NeedExecCopyReservedReg ||

1643 (ReservedRegForExecCopy &&

1644 MRI.isPhysRegUsed(ReservedRegForExecCopy, true))) {

1645 MRI.reserveReg(ReservedRegForExecCopy, TRI);

1647 if (UnusedScratchReg) {

1648

1649

1651 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);

1652 LiveUnits.addReg(UnusedScratchReg);

1653 } else {

1654

1656 "Re-reserving spill slot for EXEC copy register");

1658 false);

1659 }

1660 } else if (ReservedRegForExecCopy) {

1661

1662

1664 }

1665

1666

1667

1668

1669

1670

1671

1672

1673

1674 const bool WillHaveFP =

1677

1678 if (WillHaveFP || hasFP(MF)) {

1681 "Re-reserving spill slot for FP");

1683 }

1684

1685 if (TRI->hasBasePointer(MF)) {

1686 Register BasePtrReg = TRI->getBaseRegister();

1688 "Re-reserving spill slot for BP");

1690 }

1691}

1692

1693

1698

1699

1700

1701

1703 return;

1704

1706

1710 bool NeedExecCopyReservedReg = false;

1711

1715

1716

1717 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))

1718 NeedExecCopyReservedReg = true;

1719 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||

1720 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||

1721 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||

1723 TII->isChainCallOpcode(MI.getOpcode()))) {

1724

1726 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==

1727 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));

1728 ReturnMI = &MI;

1729 }

1730 }

1731 }

1732

1735

1736

1737

1739 if (TRI->getRegSizeInBits(*RC) != 32)

1740 continue;

1742 }

1743

1744 sort(SortedWWMVGPRs, std::greater());

1746

1748 return;

1749

1751

1752

1753 assert(!NeedExecCopyReservedReg &&

1754 "Whole wave functions can use the reg mapped for their i1 argument");

1755

1756

1757 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;

1759 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))

1762 MF.begin()->addLiveIn(Reg);

1763 }

1764 MF.begin()->sortUniqueLiveIns();

1765 }

1766

1767

1768

1769 if (ReturnMI) {

1770 for (auto &Op : ReturnMI->operands()) {

1771 if (Op.isReg())

1772 SavedVGPRs.reset(Op.getReg());

1773 }

1774 }

1775

1776

1780 TRI->getSpillAlign(*RC));

1781 }

1782

1783

1785

1786

1787

1788

1789 if (!ST.hasGFX90AInsts())

1791

1793

1794

1795

1797 SavedVGPRs.reset(Reg.first);

1798}

1799

1806 return;

1807

1810

1811

1813

1814 const BitVector AllSavedRegs = SavedRegs;

1816

1817

1818

1819

1820

1821

1823 const bool WillHaveFP =

1825

1826

1827 if (WillHaveFP || hasFP(MF))

1829

1830

1831

1832

1833

1834

1835

1837 Register RetAddrReg = TRI->getReturnAddressReg(MF);

1839 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {

1840 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));

1841 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));

1842 }

1843}

1844

1847 std::vector &CSI) {

1851

1855 return A.getReg() < B.getReg();

1856 }) &&

1857 "Callee saved registers not sorted");

1858

1860 return !CSI.isSpilledToReg() &&

1861 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&

1863 };

1864

1865 auto CSEnd = CSI.end();

1866 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {

1868 if (!CanUseBlockOps(*CSIt))

1869 continue;

1870

1871

1872

1873

1875 CSEnd = std::remove_if(

1876 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {

1877 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {

1878 Mask |= 1 << (CSI.getReg() - Reg);

1879 return true;

1880 } else {

1881 return false;

1882 }

1883 });

1884

1887 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);

1888 if (!RegBlock) {

1889

1890

1891

1892

1894 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);

1895 RegBlock =

1896 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);

1897 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&

1898 "Couldn't find super register");

1899 int RegDelta = Reg - LastBlockStart;

1901 "Bad shift amount");

1902 Mask <<= RegDelta;

1903 }

1904

1906

1907

1908

1909

1910

1912 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;

1913 int FrameIdx =

1914 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),

1915 true);

1916 MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);

1917

1918 CSIt->setFrameIdx(FrameIdx);

1919 CSIt->setReg(RegBlock);

1920 }

1921 CSI.erase(CSEnd, CSI.end());

1922}

1923

1926 std::vector &CSI) const {

1927 if (CSI.empty())

1928 return true;

1929

1931 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();

1932

1933 if (UseVGPRBlocks)

1935

1937}

1938

1941 std::vector &CSI) const {

1942 if (CSI.empty())

1943 return true;

1944

1949 Register BasePtrReg = RI->getBaseRegister();

1950 Register SGPRForFPSaveRestoreCopy =

1952 Register SGPRForBPSaveRestoreCopy =

1954 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)

1955 return false;

1956

1957 unsigned NumModifiedRegs = 0;

1958

1959 if (SGPRForFPSaveRestoreCopy)

1960 NumModifiedRegs++;

1961 if (SGPRForBPSaveRestoreCopy)

1962 NumModifiedRegs++;

1963

1964 for (auto &CS : CSI) {

1965 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {

1966 CS.setDstReg(SGPRForFPSaveRestoreCopy);

1967 if (--NumModifiedRegs)

1968 break;

1969 } else if (CS.getReg() == BasePtrReg.asMCReg() &&

1970 SGPRForBPSaveRestoreCopy) {

1971 CS.setDstReg(SGPRForBPSaveRestoreCopy);

1972 if (--NumModifiedRegs)

1973 break;

1974 }

1975 }

1976

1977 return false;

1978}

1979

1982

1986 uint64_t EstStackSize = MFI.estimateStackSize(MF);

1987 uint64_t MaxOffset = EstStackSize - 1;

1988

1989

1990

1991

1992

1993

1994

1995

1996 if (ST.enableFlatScratch()) {

1999 return false;

2000 } else {

2001 if (TII->isLegalMUBUFImmOffset(MaxOffset))

2002 return false;

2003 }

2004

2005 return true;

2006}

2007

2013 if (!ST.useVGPRBlockOpsForCSR())

2014 return false;

2015

2020

2022 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);

2025 if (!BlockRegClass->contains(Reg) ||

2028 continue;

2029 }

2030

2031

2033 int FrameIndex = CS.getFrameIdx();

2040

2042 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))

2049

2051

2052

2053

2054

2055

2056 MBB.addLiveIn(Reg);

2057 }

2058 MBB.sortUniqueLiveIns();

2059

2060 return true;

2061}

2062

2068 if (!ST.useVGPRBlockOpsForCSR())

2069 return false;

2070

2078 if (!BlockRegClass->contains(Reg) ||

2081 continue;

2082 }

2083

2084

2086 int FrameIndex = CS.getFrameIdx();

2091 MFI.getObjectAlign(FrameIndex));

2092

2094 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)

2101

2102

2103

2104

2105

2106 MBB.addLiveIn(Reg);

2107 }

2108

2109 MBB.sortUniqueLiveIns();

2110 return true;

2111}

2112

2117 int64_t Amount = I->getOperand(0).getImm();

2118 if (Amount == 0)

2119 return MBB.erase(I);

2120

2124 unsigned Opc = I->getOpcode();

2125 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();

2126 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;

2127

2130 assert(isUInt<32>(Amount) && "exceeded stack address space size");

2133

2135 if (IsDestroy)

2136 Amount = -Amount;

2140 Add->getOperand(3).setIsDead();

2141 } else if (CalleePopAmount != 0) {

2143 }

2144

2145 return MBB.erase(I);

2146}

2147

2148

2149

2150

2151

2152

2153

2154

2158

2159

2160

2161

2164

2165

2166

2170

2171

2172

2173

2174

2176 }

2177

2182 MF) ||

2185}

2186

2193

2194

2195

2196

2197

2198

2199

2200

2203

2206 "only expected to call this for entry points and chain functions");

2207

2209

2210

2211

2212

2214 return true;

2215

2216

2217

2219}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

Provides AMDGPU specific target descriptions.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

static const Function * getParent(const Value *V)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

static constexpr MCPhysReg FPReg

static constexpr MCPhysReg SPReg

This file declares the machine register scavenger class.

static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)

Definition SIFrameLowering.cpp:158

static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))

static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)

Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.

Definition SIFrameLowering.cpp:73

static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)

Definition SIFrameLowering.cpp:178

static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)

Definition SIFrameLowering.cpp:529

static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)

Definition SIFrameLowering.cpp:135

static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)

Definition SIFrameLowering.cpp:948

static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)

Returns true if the frame will require a reference to the stack pointer.

Definition SIFrameLowering.cpp:2155

static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)

Definition SIFrameLowering.cpp:203

static bool allSGPRSpillsAreDead(const MachineFunction &MF)

Definition SIFrameLowering.cpp:1444

static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)

Definition SIFrameLowering.cpp:50

static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)

Definition SIFrameLowering.cpp:35

static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)

Definition SIFrameLowering.cpp:1845

static unsigned getScratchScaleFactor(const GCNSubtarget &ST)

Definition SIFrameLowering.cpp:593

static const int BlockSize

bool isChainFunction() const

bool isEntryFunction() const

static const LaneMaskConstants & get(const GCNSubtarget &ST)

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

ArrayRef< T > slice(size_t N, size_t M) const

slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.

bool test(unsigned Idx) const

void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.

bool any() const

any - Returns true if any bit is set.

void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

clearBitsInMask - Clear any bits in this vector that are set in Mask.

iterator_range< const_set_bits_iterator > set_bits() const

bool empty() const

empty - Tests whether there are no bits in this bitvector.

The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...

MCRegister getReg() const

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

bool hasImplicitBufferPtr() const

bool hasFlatScratchInit() const

A set of register units used to track register liveness.

bool available(MCRegister Reg) const

Returns true if no part of physical register Reg is live.

void init(const TargetRegisterInfo &TRI)

Initialize and clear the set.

void addReg(MCRegister Reg)

Adds register units covered by physical register Reg.

LLVM_ABI void stepBackward(const MachineInstr &MI)

Updates liveness when stepping backwards over the instruction MI.

LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)

Adds registers living out of block MBB.

void removeReg(MCRegister Reg)

Removes all register units covered by physical register Reg.

bool empty() const

Returns true if the set is empty.

LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)

Adds registers living into block MBB.

Describe properties that are true of each instruction in the target description file.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

Wrapper class representing physical registers. Should be passed by value.

void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())

Adds the specified register as a live in.

MachineInstrBundleIterator< MachineInstr > iterator

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

bool hasVarSizedObjects() const

This method may be called any time after instruction selection is complete to determine if the stack ...

uint64_t getStackSize() const

Return the number of bytes that must be allocated to hold all of the fixed size frame objects.

bool hasCalls() const

Return true if the current function has any function calls.

bool isFrameAddressTaken() const

This method may be called any time after instruction selection is complete to determine if there is a...

Align getMaxAlign() const

Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...

bool hasPatchPoint() const

This method may be called any time after instruction selection is complete to determine if there is a...

LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)

Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...

bool hasTailCall() const

Returns true if the function contains a tail call.

Align getObjectAlign(int ObjectIdx) const

Return the alignment of the specified stack object.

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

bool hasStackMap() const

This method may be called any time after instruction selection is complete to determine if there is a...

void RemoveStackObject(int ObjectIdx)

Remove or mark dead a statically sized stack object.

int getObjectIndexEnd() const

Return one past the maximum frame object index.

uint8_t getStackID(int ObjectIdx) const

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

bool isFixedObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a fixed stack object.

int getObjectIndexBegin() const

Return the minimum frame object index.

bool isDeadObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a dead object.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineBasicBlock & front() const

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const

const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addFrameIndex(int Idx) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const

Representation of each machine instruction.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

@ MOLoad

The memory access reads data.

@ MOInvariant

The memory access always returns the same value (or traps).

@ MOStore

The memory access writes data.

void setIsDead(bool Val=true)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const

Returns list of callee saved registers.

void addLiveIn(MCRegister Reg, Register vreg=Register())

addLiveIn - Add the specified register as a live-in.

LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const

Return true if the specified register is modified in this function.

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

void restore()

Definition SIFrameLowering.cpp:370

PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)

Definition SIFrameLowering.cpp:340

void save()

Definition SIFrameLowering.cpp:359

Wrapper class representing virtual and physical registers.

MCRegister asMCReg() const

Utility to check-convert this value to a MCRegister.

void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const

Definition SIFrameLowering.cpp:1623

StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override

getFrameIndexReference - This method should return the base register and offset used to reference a f...

Definition SIFrameLowering.cpp:1460

void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override

processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...

Definition SIFrameLowering.cpp:1469

bool mayReserveScratchForCWSR(const MachineFunction &MF) const

Definition SIFrameLowering.cpp:2187

bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override

Control the placement of special register scavenging spill slots when allocating a stack frame.

Definition SIFrameLowering.cpp:1980

bool requiresStackPointerReference(const MachineFunction &MF) const

Definition SIFrameLowering.cpp:2201

void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const

Definition SIFrameLowering.cpp:597

void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

Definition SIFrameLowering.cpp:1694

void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const

Definition SIFrameLowering.cpp:992

bool hasFPImpl(const MachineFunction &MF) const override

Definition SIFrameLowering.cpp:2162

bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const

Definition SIFrameLowering.cpp:1939

bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...

Definition SIFrameLowering.cpp:2008

bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override

assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.

Definition SIFrameLowering.cpp:1924

void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

Definition SIFrameLowering.cpp:1800

void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override

Definition SIFrameLowering.cpp:1358

void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const

Definition SIFrameLowering.cpp:1097

void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override

processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...

Definition SIFrameLowering.cpp:1581

bool isSupportedStackID(TargetStackID::Value ID) const override

Definition SIFrameLowering.cpp:932

void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override

emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.

Definition SIFrameLowering.cpp:1201

MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override

This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...

Definition SIFrameLowering.cpp:2113

bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...

Definition SIFrameLowering.cpp:2063

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const

const WWMSpillsMap & getWWMSpills() const

void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const

ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const

void setSGPRForEXECCopy(Register Reg)

unsigned getNumPreloadedSGPRs() const

void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)

void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)

GCNUserSGPRUsageInfo & getUserSGPRInfo()

void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))

Register getLongBranchReservedReg() const

unsigned getDynamicVGPRBlockSize() const

bool hasSpilledVGPRs() const

void setVGPRToAGPRSpillDead(int FrameIndex)

bool isWholeWaveFunction() const

Register getStackPtrOffsetReg() const

bool isStackRealigned() const

Register getScratchRSrcReg() const

Returns the physical register reserved for use as the resource descriptor for scratch accesses.

ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const

int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)

uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const

bool hasMaskForVGPRBlockOps(Register RegisterBlock) const

bool hasPrologEpilogSGPRSpillEntry(Register Reg) const

Register getGITPtrLoReg(const MachineFunction &MF) const

void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)

bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)

Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.

void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const

Register getSGPRForEXECCopy() const

bool isWWMReservedRegister(Register Reg) const

ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const

Register getVGPRForAGPRCopy() const

bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)

Register getFrameOffsetReg() const

void setLongBranchReservedReg(Register Reg)

void setHasSpilledVGPRs(bool Spill=true)

bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)

If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.

void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)

MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const

bool checkIndexInPrologEpilogSGPRSpills(int FI) const

const ReservedRegSet & getWWMReservedRegs() const

Register getImplicitBufferPtrUserSGPR() const

const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const

void setIsStackRealigned(bool Realigned=true)

unsigned getGITPtrHigh() const

bool hasSpilledSGPRs() const

void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)

Register getScratchSGPRCopyDstReg(Register Reg) const

void setScratchRSrcReg(Register Reg)

void reserveWWMRegister(Register Reg)

Register getFrameRegister(const MachineFunction &MF) const override

const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const

void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

int64_t getFixed() const

Returns the fixed component of the stack.

bool hasFP(const MachineFunction &MF) const

hasFP - Return true if the specified function should have a dedicated frame pointer register.

virtual bool hasReservedCallFrame(const MachineFunction &MF) const

hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...

virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const

void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const

spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.

Align getStackAlign() const

getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...

const TargetRegisterInfo & getRegisterInfo() const

LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const

DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...

bool contains(Register Reg) const

Return true if the specified register is included in this register class.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ PRIVATE_ADDRESS

Address space for private memory.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)

Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.

LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ Kill

The last use of a register.

@ Undef

Value of the register doesn't matter.

@ ScalablePredicateVector

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

int countl_zero(T Val)

Count number of 0's from the most significant bit to the least stopping at the first 1.

auto reverse(ContainerTy &&C)

void sort(IteratorTy Start, IteratorTy End)

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

auto make_first_range(ContainerTy &&c)

Given a container of pairs, return a range over the first elements.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

bool is_sorted(R &&Range, Compare C)

Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...

constexpr bool isUInt(uint64_t x)

Checks if an unsigned integer fits into the given bit width.

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

@ And

Bitwise or logical AND of integers.

unsigned getKillRegState(bool B)

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

ArrayRef(const T &OneElt) -> ArrayRef< T >

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET

static constexpr uint64_t encode(Fields... Values)

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

This class contains a discriminated union of information about pointers in memory operands,...

static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.