LLVM: lib/Target/AMDGPU/SIFrameLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

18

19using namespace llvm;

20

21#define DEBUG_TYPE "frame-info"

22

24 "amdgpu-spill-vgpr-to-agpr",

25 cl::desc("Enable spilling VGPRs to AGPRs"),

28

29

30

31

32

33

38 if (MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&

39 MRI.isReserved(Reg))

40 return Reg;

41 }

43}

44

45

46

47

48

52

53 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();

54 for (unsigned i = 0; CSRegs[i]; ++i)

55 LiveUnits.addReg(CSRegs[i]);

56

57

58

59 if (Unused)

61

63 if (LiveUnits.available(Reg) && MRI.isReserved(Reg))

64 return Reg;

65 }

66

68}

69

70

71

75 bool IncludeScratchCopy = true) {

78

81 unsigned Size = TRI->getSpillSize(RC);

82 Align Alignment = TRI->getSpillAlign(RC);

83

84

85

87

88

89

90 if (IncludeScratchCopy)

92

93 if (!ScratchSGPR) {

94 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,

96

97 if (TRI->spillSGPRToVGPR() &&

99 true)) {

100

101

104 SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));

105

107 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "

108 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane

109 << '\n';);

110 } else {

111

113

114 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);

116 SGPR,

118 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "

120 }

121 } else {

124 SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));

125 LiveUnits.addReg(ScratchSGPR);

128 }

129}

130

131

132

133

140 int64_t DwordOff = 0) {

141 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

142 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;

143

149 LiveUnits.addReg(SpillReg);

151 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,

152 DwordOff, MMO, nullptr, &LiveUnits);

153 if (IsKill)

155}

156

164 Register FrameReg, int64_t DwordOff = 0) {

165 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR

166 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;

167

173 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,

174 DwordOff, MMO, nullptr, &LiveUnits);

175}

176

183 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

184 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);

185 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);

186

191 } else {

192 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);

194 }

200}

201

206 if (LiveUnits.empty()) {

208 if (IsProlog) {

210 } else {

211

214 }

215 }

216}

217

218namespace llvm {

219

220

221

222

223

239 unsigned NumSubRegs;

240 unsigned EltSize = 4;

241

242 void saveToMemory(const int FI) const {

245

247

249 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

250 if (!TmpVGPR)

252

253 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

255 ? SuperReg

256 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

259

261 FI, FrameReg, DwordOff);

262 DwordOff += 4;

263 }

264 }

265

266 void saveToVGPRLane(const int FI) const {

268

272 assert(Spill.size() == NumSubRegs);

273

274 for (unsigned I = 0; I < NumSubRegs; ++I) {

276 ? SuperReg

277 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

279 Spill[I].VGPR)

283 }

284 }

285

286 void copyToScratchSGPR(Register DstReg) const {

290 }

291

292 void restoreFromMemory(const int FI) {

294

297 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

298 if (!TmpVGPR)

300

301 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

303 ? SuperReg

304 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

305

307 TmpVGPR, FI, FrameReg, DwordOff);

310 DwordOff += 4;

311 }

312 }

313

314 void restoreFromVGPRLane(const int FI) {

318 assert(Spill.size() == NumSubRegs);

319

320 for (unsigned I = 0; I < NumSubRegs; ++I) {

322 ? SuperReg

323 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

327 }

328 }

329

330 void copyFromScratchSGPR(Register SrcReg) const {

334 }

335

336public:

345 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),

347 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),

348 FrameReg(FrameReg) {

350 SplitParts = TRI.getRegSplitParts(RC, EltSize);

351 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();

352

353 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

354 }

355

357 switch (SI.getKind()) {

359 return saveToMemory(SI.getIndex());

361 return saveToVGPRLane(SI.getIndex());

363 return copyToScratchSGPR(SI.getReg());

364 }

365 }

366

368 switch (SI.getKind()) {

370 return restoreFromMemory(SI.getIndex());

372 return restoreFromVGPRLane(SI.getIndex());

374 return copyFromScratchSGPR(SI.getReg());

375 }

376 }

377};

378

379}

380

381

382void SIFrameLowering::emitEntryFunctionFlatScratchInit(

389

390

391

392

393

394

395

396

397

398

399

402

403 if (ST.isAmdPalOS()) {

404

408

409

411 Register FlatScrInit = AMDGPU::NoRegister;

414 AllSGPR64s = AllSGPR64s.slice(

415 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));

417 for (MCPhysReg Reg : AllSGPR64s) {

418 if (LiveUnits.available(Reg) && MRI.isReserved(Reg) &&

419 MRI.isAllocatable(Reg) && TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {

420 FlatScrInit = Reg;

421 break;

422 }

423 }

424 assert(FlatScrInit && "Failed to find free register for scratch init");

425

426 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);

427 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);

428

430

431

432

434 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

436 PtrInfo,

446 .addImm(EncodedOffset)

449

450

451 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);

453 .addReg(FlatScrInitHi)

455 And->getOperand(3).setIsDead();

456 } else {

457 Register FlatScratchInitReg =

459 assert(FlatScratchInitReg);

460

462 MRI.addLiveIn(FlatScratchInitReg);

464

465 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);

466 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);

467 }

468

469

470 if (ST.flatScratchIsPointer()) {

472 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)

473 .addReg(FlatScrInitLo)

474 .addReg(ScratchWaveOffsetReg);

475 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

476 FlatScrInitHi)

477 .addReg(FlatScrInitHi)

480

481 using namespace AMDGPU::Hwreg;

483 .addReg(FlatScrInitLo)

484 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));

486 .addReg(FlatScrInitHi)

487 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));

488 return;

489 }

490

491

492 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)

493 .addReg(FlatScrInitLo)

494 .addReg(ScratchWaveOffsetReg);

495 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

496 AMDGPU::FLAT_SCR_HI)

497 .addReg(FlatScrInitHi)

500

501 return;

502 }

503

505

506

507 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)

509

510

511

512 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)

513 .addReg(FlatScrInitLo)

514 .addReg(ScratchWaveOffsetReg);

515

516

517 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),

518 AMDGPU::FLAT_SCR_HI)

522}

523

524

525

528 I != E; ++I) {

530 return false;

531 }

532

533 return true;

534}

535

536

537Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(

539

545

547

549

550 if (!ScratchRsrcReg || (MRI.isPhysRegUsed(ScratchRsrcReg) &&

553

554 if (ST.hasSGPRInitBug() ||

555 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))

556 return ScratchRsrcReg;

557

558

559

560

561

562

563

564

565

566

569 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));

570

571

572

574 for (MCPhysReg Reg : AllSGPR128s) {

575

576

577

578 if (MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

579 (!GITPtrLoReg || TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {

580 MRI.replaceRegWith(ScratchRsrcReg, Reg);

582 MRI.reserveReg(Reg, TRI);

583 return Reg;

584 }

585 }

586

587 return ScratchRsrcReg;

588}

589

591 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();

592}

593

596 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");

597

598

599

600

601

602

603

604

605

606

607

608

616

618

621

622

623

624

625

626

627

629 if (!ST.enableFlatScratch())

630 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);

631

632

633 if (ScratchRsrcReg) {

635 if (&OtherBB != &MBB) {

636 OtherBB.addLiveIn(ScratchRsrcReg);

637 }

638 }

639 }

640

641

642

643 Register PreloadedScratchRsrcReg;

644 if (ST.isAmdHsaOrMesa(F)) {

645 PreloadedScratchRsrcReg =

647 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {

648

649

650 MRI.addLiveIn(PreloadedScratchRsrcReg);

652 }

653 }

654

655

656

659

660

661

662

663

664

665 Register ScratchWaveOffsetReg;

666 if (PreloadedScratchWaveOffsetReg &&

667 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {

670 AllSGPRs = AllSGPRs.slice(

671 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));

674 if (MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

675 TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {

676 ScratchWaveOffsetReg = Reg;

677 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)

679 break;

680 }

681 }

682

683

684

685 if (!ScratchWaveOffsetReg)

687 "could not find temporary scratch offset register in prolog");

688 } else {

689 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;

690 }

691 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);

692

697 }

698

704 }

705

706 bool NeedsFlatScratchInit =

708 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||

710

711 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&

712 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {

713 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);

714 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);

715 }

716

717 if (NeedsFlatScratchInit) {

718 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);

719 }

720

721 if (ScratchRsrcReg) {

722 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,

723 PreloadedScratchRsrcReg,

724 ScratchRsrcReg, ScratchWaveOffsetReg);

725 }

726}

727

728

729void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(

732 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {

733

739

740 if (ST.isAmdPalOS()) {

741

742

743 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

744 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);

745

747

748

749

751 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);

753 PtrInfo,

762 .addImm(EncodedOffset)

766

767

768

769

770

771

772

773

774 if (ST.isWave32()) {

775 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);

779 }

780 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {

781 assert(ST.isAmdHsaOrMesa(Fn));

782 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

783

784 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);

785 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);

786

787

788 uint64_t Rsrc23 = TII->getScratchRsrcWords23();

789

791 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

792

794 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);

795

799 } else {

800 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

801

804 PtrInfo,

814

817 }

818 } else {

819 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

820 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);

821

825

829 }

830

834

838 } else if (ST.isAmdHsaOrMesa(Fn)) {

839 assert(PreloadedScratchRsrcReg);

840

841 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {

844 }

845 }

846

847

848

849

850

851

852

853

854

855

856 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

857 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);

858

859

860

861 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)

862 .addReg(ScratchRsrcSub0)

863 .addReg(ScratchWaveOffsetReg)

865 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)

866 .addReg(ScratchRsrcSub1)

870}

871

873 switch (ID) {

877 return true;

880 return false;

881 }

883}

884

885

886

892 bool EnableInactiveLanes) {

899

901

903 MRI, LiveUnits, *TRI.getWaveMaskRegClass());

904 if (!ScratchExecCopy)

906

907 LiveUnits.addReg(ScratchExecCopy);

908

909 const unsigned SaveExecOpc =

910 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32

911 : AMDGPU::S_OR_SAVEEXEC_B32)

912 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64

913 : AMDGPU::S_OR_SAVEEXEC_B64);

914 auto SaveExec =

917

918 return ScratchExecCopy;

919}

920

929

930

931

932

936 if (!WWMScratchRegs.empty())

937 ScratchExecCopy =

939 true, true);

940

941 auto StoreWWMRegisters =

943 for (const auto &Reg : WWMRegs) {

945 int FI = Reg.second;

947 VGPR, FI, FrameReg);

948 }

949 };

950

951 StoreWWMRegisters(WWMScratchRegs);

952 if (!WWMCalleeSavedRegs.empty()) {

953 if (ScratchExecCopy) {

954 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

956 } else {

958 true,

959 false);

960 }

961 }

962

963 StoreWWMRegisters(WWMCalleeSavedRegs);

964 if (ScratchExecCopy) {

965

966 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

969 LiveUnits.addReg(ScratchExecCopy);

970 }

971

973

975

976

977

978

980 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

981 if (!Reg)

982 continue;

983

985 LiveUnits, FrameReg);

987 }

988

989

990

993 if (!ScratchSGPRs.empty()) {

995 for (MCPhysReg Reg : ScratchSGPRs)

997

999 }

1000 if (!LiveUnits.empty()) {

1001 for (MCPhysReg Reg : ScratchSGPRs)

1002 LiveUnits.addReg(Reg);

1003 }

1004 }

1005}

1006

1016

1018

1019

1020

1021

1022

1024 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

1025 if (!Reg)

1026 continue;

1027

1029 LiveUnits, FrameReg);

1031 }

1032

1033

1034

1035

1039 if (!WWMScratchRegs.empty())

1040 ScratchExecCopy =

1042 false, true);

1043

1044 auto RestoreWWMRegisters =

1046 for (const auto &Reg : WWMRegs) {

1048 int FI = Reg.second;

1050 VGPR, FI, FrameReg);

1051 }

1052 };

1053

1054 RestoreWWMRegisters(WWMScratchRegs);

1055 if (!WWMCalleeSavedRegs.empty()) {

1056 if (ScratchExecCopy) {

1057 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

1059 } else {

1061 false,

1062 false);

1063 }

1064 }

1065

1066 RestoreWWMRegisters(WWMCalleeSavedRegs);

1067 if (ScratchExecCopy) {

1068

1069 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

1072 }

1073}

1074

1080 return;

1081 }

1082

1088

1092 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();

1094

1096

1097

1099

1101

1102

1104 if (UseSP) {

1105 assert(StackPtrReg != AMDGPU::SP_REG);

1106

1109 }

1110 }

1111

1112 bool HasFP = false;

1113 bool HasBP = false;

1115 uint32_t RoundedSize = NumBytes;

1116

1117 if (TRI.hasStackRealignment(MF))

1118 HasFP = true;

1119

1120 Register FramePtrRegScratchCopy;

1121 if (!HasFP && hasFP(MF)) {

1122

1125 FramePtrRegScratchCopy);

1126 } else {

1127

1128 Register SGPRForFPSaveRestoreCopy =

1130

1132 if (SGPRForFPSaveRestoreCopy) {

1133

1134

1135

1137 FramePtrReg,

1139 DL, TII, TRI, LiveUnits, FramePtrReg);

1141 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

1142 } else {

1143

1144

1146 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

1147 if (!FramePtrRegScratchCopy)

1149

1150 LiveUnits.addReg(FramePtrRegScratchCopy);

1152 .addReg(FramePtrReg);

1153 }

1154 }

1155

1156 if (HasFP) {

1158

1159 RoundedSize += Alignment;

1160 if (LiveUnits.empty()) {

1163 }

1164

1165

1166

1168 .addReg(StackPtrReg)

1175 And->getOperand(3).setIsDead();

1177 } else if ((HasFP = hasFP(MF))) {

1179 .addReg(StackPtrReg)

1181 }

1182

1183

1184 if (HasFP) {

1186 FramePtrRegScratchCopy);

1187 if (FramePtrRegScratchCopy)

1188 LiveUnits.removeReg(FramePtrRegScratchCopy);

1189 }

1190

1191

1192

1193

1194

1195 if ((HasBP = TRI.hasBasePointer(MF))) {

1197 .addReg(StackPtrReg)

1199 }

1200

1201 if (HasFP && RoundedSize != 0) {

1203 .addReg(StackPtrReg)

1206 Add->getOperand(3).setIsDead();

1207 }

1208

1210 (void)FPSaved;

1211 assert((!HasFP || FPSaved) &&

1212 "Needed to save FP but didn't save it anywhere");

1213

1214

1215

1217 "Saved FP but didn't need it");

1218

1220 (void)BPSaved;

1221 assert((!HasBP || BPSaved) &&

1222 "Needed to save BP but didn't save it anywhere");

1223

1224 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");

1225}

1226

1231 return;

1232

1238

1239

1245 DL = MBBI->getDebugLoc();

1246

1248 }

1249

1254 : NumBytes;

1258

1259 Register FramePtrRegScratchCopy;

1260 Register SGPRForFPSaveRestoreCopy =

1262 if (FPSaved) {

1263

1264

1265

1266

1268 if (SGPRForFPSaveRestoreCopy) {

1269 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

1270 } else {

1272 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

1273 if (!FramePtrRegScratchCopy)

1275

1276 LiveUnits.addReg(FramePtrRegScratchCopy);

1277 }

1278

1280 FramePtrRegScratchCopy);

1281 }

1282

1283 if (RoundedSize != 0 && hasFP(MF)) {

1285 .addReg(StackPtrReg)

1288 Add->getOperand(3).setIsDead();

1289 }

1290

1291 if (FPSaved) {

1292

1293 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy

1294 : FramePtrRegScratchCopy;

1298 if (SGPRForFPSaveRestoreCopy)

1300 } else {

1301

1304 FramePtrRegScratchCopy);

1305 }

1306}

1307

1308#ifndef NDEBUG

1313 I != E; ++I) {

1317 return false;

1318 }

1319 }

1320

1321 return true;

1322}

1323#endif

1324

1326 int FI,

1329

1332}

1333

1338

1344

1345 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()

1347

1348 if (SpillVGPRToAGPR) {

1349

1352

1353 bool SeenDbgInstr = false;

1354

1357 int FrameIndex;

1358 if (MI.isDebugInstr())

1359 SeenDbgInstr = true;

1360

1361 if (TII->isVGPRSpill(MI)) {

1362

1363

1365 AMDGPU::OpName::vaddr);

1366 int FI = MI.getOperand(FIOp).getIndex();

1368 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();

1370 TRI->isAGPR(MRI, VReg))) {

1371 assert(RS != nullptr);

1373 RS->backward(std::next(MI.getIterator()));

1374 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);

1375 SpillFIs.set(FI);

1376 continue;

1377 }

1381 NonVGPRSpillFIs.set(FrameIndex);

1382 }

1383 }

1384

1385

1386

1387 for (unsigned FI : SpillFIs.set_bits())

1388 if (!NonVGPRSpillFIs.test(FI))

1390

1394

1397

1399

1400 if (!SpillFIs.empty() && SeenDbgInstr) {

1401

1402

1403

1405 if (MI.isDebugValue()) {

1406 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;

1407 if (MI.getOperand(StackOperandIdx).isFI() &&

1409 MI.getOperand(StackOperandIdx).getIndex()) &&

1410 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {

1411 MI.getOperand(StackOperandIdx)

1412 .ChangeToRegister(Register(), false );

1413 }

1414 }

1415 }

1416 }

1417 }

1418 }

1419

1420

1421

1422

1423 bool HaveSGPRToVMemSpill =

1426 "SGPR spill should have been removed in SILowerSGPRSpills");

1427

1428

1429

1430

1432 assert(RS && "RegScavenger required if spilling");

1433

1434

1436

1437

1438

1439 if (HaveSGPRToVMemSpill &&

1442 }

1443 }

1444}

1445

1452

1453 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

1454

1455

1456

1457

1460 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

1461 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <

1462 TRI->getHWRegIndex(VGPRForAGPRCopy))) {

1463

1464

1465

1466

1468 MRI.reserveReg(UnusedLowVGPR, TRI);

1469 }

1470 }

1471

1472

1475 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);

1476

1477

1478

1479

1480 if (LongBranchReservedReg && UnusedLowSGPR) {

1482 MRI.reserveReg(UnusedLowSGPR, TRI);

1483 }

1484}

1485

1486

1487

1490 bool NeedExecCopyReservedReg) const {

1498

1499

1501 for (unsigned I = 0; CSRegs[I]; ++I)

1502 LiveUnits.addReg(CSRegs[I]);

1503

1505

1507 if (NeedExecCopyReservedReg ||

1508 (ReservedRegForExecCopy &&

1509 MRI.isPhysRegUsed(ReservedRegForExecCopy, true))) {

1510 MRI.reserveReg(ReservedRegForExecCopy, TRI);

1512 if (UnusedScratchReg) {

1513

1514

1516 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);

1517 LiveUnits.addReg(UnusedScratchReg);

1518 } else {

1519

1521 "Re-reserving spill slot for EXEC copy register");

1523 false);

1524 }

1525 } else if (ReservedRegForExecCopy) {

1526

1527

1529 }

1530

1531

1532

1533

1534

1535

1536

1537

1538

1539 const bool WillHaveFP =

1542

1543 if (WillHaveFP || hasFP(MF)) {

1546 "Re-reserving spill slot for FP");

1548 }

1549

1550 if (TRI->hasBasePointer(MF)) {

1551 Register BasePtrReg = TRI->getBaseRegister();

1553 "Re-reserving spill slot for BP");

1555 }

1556}

1557

1558

1563

1564

1565

1566

1568 return;

1569

1571

1575 bool NeedExecCopyReservedReg = false;

1576

1580

1581

1582 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))

1583 NeedExecCopyReservedReg = true;

1584 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||

1585 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||

1587 TII->isChainCallOpcode(MI.getOpcode()))) {

1588

1590 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==

1591 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));

1592 ReturnMI = &MI;

1593 }

1594 }

1595 }

1596

1599

1600

1601

1603 if (TRI->getRegSizeInBits(*RC) > 32)

1604 continue;

1606 }

1607

1608 sort(SortedWWMVGPRs, std::greater());

1610

1612 return;

1613

1614

1615

1616 if (ReturnMI) {

1617 for (auto &Op : ReturnMI->operands()) {

1618 if (Op.isReg())

1619 SavedVGPRs.reset(Op.getReg());

1620 }

1621 }

1622

1623

1627 TRI->getSpillAlign(*RC));

1628 }

1629

1630

1632

1633

1634

1635

1636 if (!ST.hasGFX90AInsts())

1638

1640

1641

1642

1644 SavedVGPRs.reset(Reg.first);

1645}

1646

1653 return;

1654

1657

1658

1660

1661 const BitVector AllSavedRegs = SavedRegs;

1663

1664

1665

1666

1667

1668

1670 const bool WillHaveFP =

1672

1673

1674 if (WillHaveFP || hasFP(MF))

1676

1677

1678

1679

1680

1681

1682

1684 Register RetAddrReg = TRI->getReturnAddressReg(MF);

1686 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {

1687 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));

1688 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));

1689 }

1690}

1691

1694 std::vector &CSI) const {

1695 if (CSI.empty())

1696 return true;

1697

1702 Register BasePtrReg = RI->getBaseRegister();

1703 Register SGPRForFPSaveRestoreCopy =

1705 Register SGPRForBPSaveRestoreCopy =

1707 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)

1708 return false;

1709

1710 unsigned NumModifiedRegs = 0;

1711

1712 if (SGPRForFPSaveRestoreCopy)

1713 NumModifiedRegs++;

1714 if (SGPRForBPSaveRestoreCopy)

1715 NumModifiedRegs++;

1716

1717 for (auto &CS : CSI) {

1718 if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {

1719 CS.setDstReg(SGPRForFPSaveRestoreCopy);

1720 if (--NumModifiedRegs)

1721 break;

1722 } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {

1723 CS.setDstReg(SGPRForBPSaveRestoreCopy);

1724 if (--NumModifiedRegs)

1725 break;

1726 }

1727 }

1728

1729 return false;

1730}

1731

1734

1738 uint64_t EstStackSize = MFI.estimateStackSize(MF);

1739 uint64_t MaxOffset = EstStackSize - 1;

1740

1741

1742

1743

1744

1745

1746

1747

1748 if (ST.enableFlatScratch()) {

1751 return false;

1752 } else {

1753 if (TII->isLegalMUBUFImmOffset(MaxOffset))

1754 return false;

1755 }

1756

1757 return true;

1758}

1759

1764 int64_t Amount = I->getOperand(0).getImm();

1765 if (Amount == 0)

1767

1771 unsigned Opc = I->getOpcode();

1772 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();

1773 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;

1774

1777 assert(isUInt<32>(Amount) && "exceeded stack address space size");

1780

1782 if (IsDestroy)

1783 Amount = -Amount;

1787 Add->getOperand(3).setIsDead();

1788 } else if (CalleePopAmount != 0) {

1790 }

1791

1793}

1794

1795

1796

1797

1798

1799

1800

1801

1804}

1805

1806

1807

1808

1811

1812

1813

1817

1818

1819

1820

1821

1823 }

1824

1827 MF) ||

1829}

1830

1831

1832

1833

1834

1835

1836

1837

1840

1843 "only expected to call this for entry points and chain functions");

1844

1846

1847

1848

1849

1851 return true;

1852

1853

1854

1856}

unsigned const MachineRegisterInfo * MRI

static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB)

Provides AMDGPU specific target descriptions.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

static const Function * getParent(const Value *V)

AMD GCN specific subclass of TargetSubtarget.

const HexagonInstrInfo * TII

unsigned const TargetRegisterInfo * TRI

static constexpr Register SPReg

static constexpr Register FPReg

This file declares the machine register scavenger class.

static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)

static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))

static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)

Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.

static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)

static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)

static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)

static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)

static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)

Returns true if the frame will require a reference to the stack pointer.

static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)

static bool allSGPRSpillsAreDead(const MachineFunction &MF)

static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)

static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)

static unsigned getScratchScaleFactor(const GCNSubtarget &ST)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

bool isChainFunction() const

bool isEntryFunction() const

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

ArrayRef< T > slice(size_t N, size_t M) const

slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.

bool test(unsigned Idx) const

void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.

bool any() const

any - Returns true if any bit is set.

void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

clearBitsInMask - Clear any bits in this vector that are set in Mask.

iterator_range< const_set_bits_iterator > set_bits() const

bool empty() const

empty - Tests whether there are no bits in this bitvector.

This class represents an Operation in the Expression.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

bool hasImplicitBufferPtr() const

bool hasFlatScratchInit() const

Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override

TargetInstrInfo overrides.

Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override

If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...

A set of register units used to track register liveness.

bool available(MCPhysReg Reg) const

Returns true if no part of physical register Reg is live.

void init(const TargetRegisterInfo &TRI)

Initialize and clear the set.

void stepBackward(const MachineInstr &MI)

Updates liveness when stepping backwards over the instruction MI.

void addReg(MCPhysReg Reg)

Adds register units covered by physical register Reg.

void addLiveOuts(const MachineBasicBlock &MBB)

Adds registers living out of block MBB.

bool empty() const

Returns true if the set is empty.

void addLiveIns(const MachineBasicBlock &MBB)

Adds registers living into block MBB.

void removeReg(MCPhysReg Reg)

Removes all register units covered by physical register Reg.

Describe properties that are true of each instruction in the target description file.

Wrapper class representing physical registers. Should be passed by value.

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

void sortUniqueLiveIns()

Sorts and uniques the LiveIns vector.

iterator getLastNonDebugInstr(bool SkipPseudoOp=true)

Returns an iterator to the last non-debug instruction in the basic block, or end().

void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())

Adds the specified register as a live in.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const

Return true if the specified register is in the live in set.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

bool hasVarSizedObjects() const

This method may be called any time after instruction selection is complete to determine if the stack ...

uint64_t getStackSize() const

Return the number of bytes that must be allocated to hold all of the fixed size frame objects.

bool hasCalls() const

Return true if the current function has any function calls.

bool isFrameAddressTaken() const

This method may be called any time after instruction selection is complete to determine if there is a...

Align getMaxAlign() const

Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...

bool hasPatchPoint() const

This method may be called any time after instruction selection is complete to determine if there is a...

int CreateSpillStackObject(uint64_t Size, Align Alignment)

Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...

bool hasTailCall() const

Returns true if the function contains a tail call.

Align getObjectAlign(int ObjectIdx) const

Return the alignment of the specified stack object.

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

bool hasStackMap() const

This method may be called any time after instruction selection is complete to determine if there is a...

void RemoveStackObject(int ObjectIdx)

Remove or mark dead a statically sized stack object.

int getObjectIndexEnd() const

Return one past the maximum frame object index.

uint8_t getStackID(int ObjectIdx) const

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

bool isFixedObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a fixed stack object.

int getObjectIndexBegin() const

Return the minimum frame object index.

bool isDeadObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a dead object.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineBasicBlock & front() const

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const

const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const

Representation of each machine instruction.

iterator_range< mop_iterator > operands()

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

@ MOLoad

The memory access reads data.

@ MOInvariant

The memory access always returns the same value (or traps).

@ MOStore

The memory access writes data.

void setIsDead(bool Val=true)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

const MCPhysReg * getCalleeSavedRegs() const

Returns list of callee saved registers.

void addLiveIn(MCRegister Reg, Register vreg=Register())

addLiveIn - Add the specified register as a live-in.

PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)

void enterBasicBlockEnd(MachineBasicBlock &MBB)

Start tracking liveness from the end of basic block MBB.

void backward()

Update internal register state and move MBB iterator backwards.

void addScavengingFrameIndex(int FI)

Add a scavenging frame index.

Wrapper class representing virtual and physical registers.

void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const

StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override

getFrameIndexReference - This method should return the base register and offset used to reference a f...

void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override

processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...

bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override

Control the placement of special register scavenging spill slots when allocating a stack frame.

bool requiresStackPointerReference(const MachineFunction &MF) const

void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const

void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const

bool hasFPImpl(const MachineFunction &MF) const override

bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override

void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override

void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const

void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override

processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...

bool isSupportedStackID(TargetStackID::Value ID) const override

void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override

emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.

MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override

This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const

const WWMSpillsMap & getWWMSpills() const

void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const

ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const

void setSGPRForEXECCopy(Register Reg)

unsigned getNumPreloadedSGPRs() const

void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)

GCNUserSGPRUsageInfo & getUserSGPRInfo()

void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))

Register getLongBranchReservedReg() const

bool hasSpilledVGPRs() const

void setVGPRToAGPRSpillDead(int FrameIndex)

Register getStackPtrOffsetReg() const

bool isStackRealigned() const

Register getScratchRSrcReg() const

Returns the physical register reserved for use as the resource descriptor for scratch accesses.

ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const

int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)

bool hasPrologEpilogSGPRSpillEntry(Register Reg) const

Register getGITPtrLoReg(const MachineFunction &MF) const

void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)

bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)

Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.

void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const

Register getSGPRForEXECCopy() const

ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const

Register getVGPRForAGPRCopy() const

bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)

Register getFrameOffsetReg() const

void setLongBranchReservedReg(Register Reg)

bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)

If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.

MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const

bool checkIndexInPrologEpilogSGPRSpills(int FI) const

const ReservedRegSet & getWWMReservedRegs() const

Register getImplicitBufferPtrUserSGPR() const

const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const

void setIsStackRealigned(bool Realigned=true)

unsigned getGITPtrHigh() const

bool hasSpilledSGPRs() const

void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)

Register getScratchSGPRCopyDstReg(Register Reg) const

void setScratchRSrcReg(Register Reg)

Register getFrameRegister(const MachineFunction &MF) const override

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

int64_t getFixed() const

Returns the fixed component of the stack.

bool hasFP(const MachineFunction &MF) const

hasFP - Return true if the specified function should have a dedicated frame pointer register.

virtual bool hasReservedCallFrame(const MachineFunction &MF) const

hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...

virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

Align getStackAlign() const

getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...

bool DisableFramePointerElim(const MachineFunction &MF) const

DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ PRIVATE_ADDRESS

Address space for private memory.

uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)

Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.

LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)

bool isCompute(CallingConv::ID cc)

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ Kill

The last use of a register.

@ Undef

Value of the register doesn't matter.

Reg

All possible values of the reg field in the ModR/M byte.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

void sort(IteratorTy Start, IteratorTy End)

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

@ And

Bitwise or logical AND of integers.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET

This struct is a compact representation of a valid (non-zero power of two) alignment.

uint64_t value() const

This is a hole in the type system and should not be abused.

This class contains a discriminated union of information about pointers in memory operands,...

static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.