LLVM: lib/Target/AMDGPU/SIRegisterInfo.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

26

27using namespace llvm;

28

29#define GET_REGINFO_TARGET_DESC

30#include "AMDGPUGenRegisterInfo.inc"

31

33 "amdgpu-spill-sgpr-to-vgpr",

34 cl::desc("Enable spilling SGPRs to VGPRs"),

37

38std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;

39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;

40

41

42

43

44

46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};

47

49 const Twine &ErrMsg) {

52}

53

54namespace llvm {

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

84

85

92

93

94

95

97

99

101

103

106

117

122 MI->getOperand(0).isKill(), Index, RS) {}

123

134

136 ExecReg = AMDGPU::EXEC_LO;

137 MovOpc = AMDGPU::S_MOV_B32;

138 NotOpc = AMDGPU::S_NOT_B32;

139 } else {

141 MovOpc = AMDGPU::S_MOV_B64;

142 NotOpc = AMDGPU::S_NOT_B64;

143 }

144

145 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

147 SuperReg != AMDGPU::EXEC && "exec should never spill");

148 }

149

157

158

159

160

161

162

163

164

165

166

167

168

169

170

172

173

174

175

176

177

178 assert(RS && "Cannot spill SGPR to memory without RegScavenger");

179 TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,

180 0, false);

181

182

185

186

188 } else {

189

192 }

193

195

196

198 }

199

200

201

203

204

205 assert(SavedExecReg && "Exec is already saved, refuse to save again");

207 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;

209 SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);

210

212

215

217 auto I =

221

222 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, false);

223 } else {

224

225

226

227 if (RS->isRegUsed(AMDGPU::SCC))

229 "unhandled SGPR spill to memory");

230

231

233 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, false,

234 false);

235

239 I->getOperand(2).setIsDead();

240 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, false);

241 }

242 }

243

244

245

246

247

248

249

250

251

252

253

256

257 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, true,

258 false);

259

262

263

266 }

267 } else {

268

269 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, true,

270 false);

274 I->getOperand(2).setIsDead();

275

276

278 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, true);

279 }

280

281

285 }

286 }

287

288

289

290

291

292

293

294

297

298 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);

299 } else {

300

301

302

303 if (RS->isRegUsed(AMDGPU::SCC))

305 "unhandled SGPR spill to memory");

306

307

308 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,

309 false);

310

313 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);

316 }

317 }

318

321 MI = NewMI;

322 MBB = NewMBB;

323 }

324};

325

326}

327

330 ST.getAMDGPUDwarfFlavour(),

331 0,

334

335 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&

336 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&

337 (getSubRegIndexLaneMask(AMDGPU::lo16) |

338 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==

339 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&

340 "getNumCoveredRegs() will not work with generated subreg masks!");

341

342 RegPressureIgnoredUnits.resize(getNumRegUnits());

343 RegPressureIgnoredUnits.set(

344 static_cast<unsigned>(*regunits(MCRegister::from(AMDGPU::M0)).begin()));

345 for (auto Reg : AMDGPU::VGPR_16RegClass) {

347 RegPressureIgnoredUnits.set(

348 static_cast<unsigned>(*regunits(Reg).begin()));

349 }

350

351

353

354 static auto InitializeRegSplitPartsOnce = [this]() {

355 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {

356 unsigned Size = getSubRegIdxSize(Idx);

357 if (Size & 15)

358 continue;

359 std::vector<int16_t> &Vec = RegSplitParts[Size / 16 - 1];

360 unsigned Pos = getSubRegIdxOffset(Idx);

361 if (Pos % Size)

362 continue;

364 if (Vec.empty()) {

365 unsigned MaxNumParts = 1024 / Size;

366 Vec.resize(MaxNumParts);

367 }

368 Vec[Pos] = Idx;

369 }

370 };

371

372 static llvm::once_flag InitializeSubRegFromChannelTableFlag;

373

374 static auto InitializeSubRegFromChannelTableOnce = [this]() {

375 for (auto &Row : SubRegFromChannelTable)

376 Row.fill(AMDGPU::NoSubRegister);

377 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {

378 unsigned Width = getSubRegIdxSize(Idx) / 32;

379 unsigned Offset = getSubRegIdxOffset(Idx) / 32;

382 if (Width == 0)

383 continue;

384 unsigned TableIdx = Width - 1;

385 assert(TableIdx < SubRegFromChannelTable.size());

387 SubRegFromChannelTable[TableIdx][Offset] = Idx;

388 }

389 };

390

391 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);

393 InitializeSubRegFromChannelTableOnce);

394}

395

400}

401

402

406 switch (CC) {

410 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList

411 : CSR_AMDGPU_SaveList;

414 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList

415 : CSR_AMDGPU_SI_Gfx_SaveList;

417 return CSR_AMDGPU_CS_ChainPreserve_SaveList;

418 default: {

419

420 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;

421 return &NoCalleeSavedReg;

422 }

423 }

424}

425

430

433 switch (CC) {

437 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask

438 : CSR_AMDGPU_RegMask;

441 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask

442 : CSR_AMDGPU_SI_Gfx_RegMask;

445

446

447 return AMDGPU_AllVGPRs_RegMask;

448 default:

449 return nullptr;

450 }

451}

452

454 return CSR_AMDGPU_NoRegs_RegMask;

455}

456

458 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;

459}

460

464

465

466

467

469 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)

470 return &AMDGPU::AV_32RegClass;

471 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)

472 return &AMDGPU::AV_64RegClass;

473 if (RC == &AMDGPU::VReg_64_Align2RegClass ||

474 RC == &AMDGPU::AReg_64_Align2RegClass)

475 return &AMDGPU::AV_64_Align2RegClass;

476 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)

477 return &AMDGPU::AV_96RegClass;

478 if (RC == &AMDGPU::VReg_96_Align2RegClass ||

479 RC == &AMDGPU::AReg_96_Align2RegClass)

480 return &AMDGPU::AV_96_Align2RegClass;

481 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)

482 return &AMDGPU::AV_128RegClass;

483 if (RC == &AMDGPU::VReg_128_Align2RegClass ||

484 RC == &AMDGPU::AReg_128_Align2RegClass)

485 return &AMDGPU::AV_128_Align2RegClass;

486 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)

487 return &AMDGPU::AV_160RegClass;

488 if (RC == &AMDGPU::VReg_160_Align2RegClass ||

489 RC == &AMDGPU::AReg_160_Align2RegClass)

490 return &AMDGPU::AV_160_Align2RegClass;

491 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)

492 return &AMDGPU::AV_192RegClass;

493 if (RC == &AMDGPU::VReg_192_Align2RegClass ||

494 RC == &AMDGPU::AReg_192_Align2RegClass)

495 return &AMDGPU::AV_192_Align2RegClass;

496 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)

497 return &AMDGPU::AV_256RegClass;

498 if (RC == &AMDGPU::VReg_256_Align2RegClass ||

499 RC == &AMDGPU::AReg_256_Align2RegClass)

500 return &AMDGPU::AV_256_Align2RegClass;

501 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)

502 return &AMDGPU::AV_512RegClass;

503 if (RC == &AMDGPU::VReg_512_Align2RegClass ||

504 RC == &AMDGPU::AReg_512_Align2RegClass)

505 return &AMDGPU::AV_512_Align2RegClass;

506 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)

507 return &AMDGPU::AV_1024RegClass;

508 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||

509 RC == &AMDGPU::AReg_1024_Align2RegClass)

510 return &AMDGPU::AV_1024_Align2RegClass;

511 }

512

514}

515

519

520

521

522

523

526 }

529}

530

536

538

540 return AMDGPU_AllVGPRs_RegMask;

541}

542

544 return AMDGPU_AllAGPRs_RegMask;

545}

546

548 return AMDGPU_AllVectorRegs_RegMask;

549}

550

552 return AMDGPU_AllAllocatableSRegs_RegMask;

553}

554

556 unsigned NumRegs) {

559 assert(NumRegIndex && "Not implemented");

560 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());

561 return SubRegFromChannelTable[NumRegIndex - 1][Channel];

562}

563

566 const unsigned Align,

569 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));

570 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);

571}

572

577

581

583

584

585

586

587

588 reserveRegisterTuples(Reserved, AMDGPU::EXEC);

589 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);

590

591

592 reserveRegisterTuples(Reserved, AMDGPU::M0);

593

594

595 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);

596 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);

597 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);

598

599

600 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);

601 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);

602 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);

603 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);

604 reserveRegisterTuples(Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_LO);

605 reserveRegisterTuples(Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_HI);

606

607

608 reserveRegisterTuples(Reserved, AMDGPU::ASYNCcnt);

609 reserveRegisterTuples(Reserved, AMDGPU::TENSORcnt);

610

611

612 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);

613

614

615 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);

616

617

618 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);

619

620

621 reserveRegisterTuples(Reserved, AMDGPU::TBA);

622 reserveRegisterTuples(Reserved, AMDGPU::TMA);

623 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);

624 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);

625 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);

626 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);

627 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);

628 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);

629 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);

630 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);

631

632

633 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);

634

635

636

637 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);

638 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();

640 if (RC->isBaseClass() && isSGPRClass(RC)) {

641 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);

644 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)

646 }

647 }

648 }

649

651 if (ScratchRSrcReg != AMDGPU::NoRegister) {

652

653

654

655 reserveRegisterTuples(Reserved, ScratchRSrcReg);

656 }

657

659 if (LongBranchReservedReg)

660 reserveRegisterTuples(Reserved, LongBranchReservedReg);

661

662

663

664

666 if (StackPtrReg) {

667 reserveRegisterTuples(Reserved, StackPtrReg);

668 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));

669 }

670

672 if (FrameReg) {

673 reserveRegisterTuples(Reserved, FrameReg);

674 assert(!isSubRegister(ScratchRSrcReg, FrameReg));

675 }

676

679 reserveRegisterTuples(Reserved, BasePtrReg);

680 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));

681 }

682

683

684

686 if (ExecCopyReg)

687 reserveRegisterTuples(Reserved, ExecCopyReg);

688

689

690

691 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());

692

694 if (RC->isBaseClass() && isVGPRClass(RC)) {

695 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);

698 if (Index + NumRegs > MaxNumVGPRs)

700 }

701 }

702 }

703

704

705 if (!ST.hasMAIInsts())

706 MaxNumAGPRs = 0;

708 if (RC->isBaseClass() && isAGPRClass(RC)) {

709 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);

712 if (Index + NumRegs > MaxNumAGPRs)

714 }

715 }

716 }

717

718

719

720 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

722 }

723

724

725

726

728 if (!NonWWMRegMask.empty()) {

729 for (unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;

730 RegI < RegE; ++RegI) {

731 if (NonWWMRegMask.test(RegI))

732 reserveRegisterTuples(Reserved, RegI);

733 }

734 }

735

737 reserveRegisterTuples(Reserved, Reg);

738

739

741 reserveRegisterTuples(Reserved, Reg);

742

744 reserveRegisterTuples(Reserved, Reg);

745

747}

748

753

756

757

758

759

760

761 if (Info->isBottomOfStack())

762 return false;

763

765}

766

769 if (Info->isEntryFunction()) {

772 }

773

774

775 return true;

776}

777

780

781

782

783

784 return false;

785}

786

792

795

796 return true;

797}

798

801

802 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),

803 AMDGPU::OpName::offset);

804 return MI->getOperand(OffIdx).getImm();

805}

806

808 int Idx) const {

809 switch (MI->getOpcode()) {

810 case AMDGPU::V_ADD_U32_e32:

811 case AMDGPU::V_ADD_U32_e64:

812 case AMDGPU::V_ADD_CO_U32_e32: {

813 int OtherIdx = Idx == 1 ? 2 : 1;

815 return OtherOp.isImm() ? OtherOp.getImm() : 0;

816 }

817 case AMDGPU::V_ADD_CO_U32_e64: {

818 int OtherIdx = Idx == 2 ? 3 : 2;

820 return OtherOp.isImm() ? OtherOp.getImm() : 0;

821 }

822 default:

823 break;

824 }

825

827 return 0;

828

829 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),

830 AMDGPU::OpName::vaddr) ||

831 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),

832 AMDGPU::OpName::saddr))) &&

833 "Should never see frame index on non-address operand");

834

836}

837

840 assert(MI.getDesc().isAdd());

843

844 if (Src0.isFI()) {

845 return Src1.isImm() || (Src1.isReg() && TRI.isVGPR(MI.getMF()->getRegInfo(),

847 }

848

849 if (Src1.isFI()) {

850 return Src0.isImm() || (Src0.isReg() && TRI.isVGPR(MI.getMF()->getRegInfo(),

852 }

853

854 return false;

855}

856

858

859 switch (MI->getOpcode()) {

860 case AMDGPU::V_ADD_U32_e32: {

861

862

863 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e32) < 2 &&

865 return false;

866 [[fallthrough]];

867 }

868 case AMDGPU::V_ADD_U32_e64:

869

870

871

872

873

874

875

876

877 return !ST.enableFlatScratch();

878 case AMDGPU::V_ADD_CO_U32_e32:

879 if (ST.getConstantBusLimit(AMDGPU::V_ADD_CO_U32_e32) < 2 &&

881 return false;

882

883

884 return MI->getOperand(3).isDead();

885 case AMDGPU::V_ADD_CO_U32_e64:

886

887 return MI->getOperand(1).isDead();

888 default:

889 break;

890 }

891

893 return false;

894

896

899 return TII->isLegalMUBUFImmOffset(FullOffset);

900

903}

904

906 int FrameIdx,

907 int64_t Offset) const {

909 DebugLoc DL;

910

911 if (Ins != MBB->end())

912 DL = Ins->getDebugLoc();

913

917 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32

918 : AMDGPU::V_MOV_B32_e32;

919

920 Register BaseReg = MRI.createVirtualRegister(

921 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass

922 : &AMDGPU::VGPR_32RegClass);

923

927 return BaseReg;

928 }

929

930 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);

931

932 Register FIReg = MRI.createVirtualRegister(

933 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass

934 : &AMDGPU::VGPR_32RegClass);

935

936 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)

940

941 if (ST.enableFlatScratch() ) {

942

943 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)

947 return BaseReg;

948 }

949

950 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)

952 .addReg(FIReg)

953 .addImm(0);

954

955 return BaseReg;

956}

957

959 int64_t Offset) const {

961

962 switch (MI.getOpcode()) {

963 case AMDGPU::V_ADD_U32_e32:

964 case AMDGPU::V_ADD_CO_U32_e32: {

967 if (!FIOp->isFI())

969

970 if (!ImmOp->isImm()) {

973 TII->legalizeOperandsVOP2(MI.getMF()->getRegInfo(), MI);

974 return;

975 }

976

977 int64_t TotalOffset = ImmOp->getImm() + Offset;

978 if (TotalOffset == 0) {

979 MI.setDesc(TII->get(AMDGPU::COPY));

980 for (unsigned I = MI.getNumOperands() - 1; I != 1; --I)

981 MI.removeOperand(I);

982

983 MI.getOperand(1).ChangeToRegister(BaseReg, false);

984 return;

985 }

986

987 ImmOp->setImm(TotalOffset);

988

992

993

994

995

996

999 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

1000 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), BaseRegVGPR)

1002 MI.getOperand(2).ChangeToRegister(BaseRegVGPR, false);

1003 } else {

1004 MI.getOperand(2).ChangeToRegister(BaseReg, false);

1005 }

1006 return;

1007 }

1008 case AMDGPU::V_ADD_U32_e64:

1009 case AMDGPU::V_ADD_CO_U32_e64: {

1010 int Src0Idx = MI.getNumExplicitDefs();

1013 if (!FIOp->isFI())

1015

1016 if (!ImmOp->isImm()) {

1018 TII->legalizeOperandsVOP3(MI.getMF()->getRegInfo(), MI);

1019 return;

1020 }

1021

1022 int64_t TotalOffset = ImmOp->getImm() + Offset;

1023 if (TotalOffset == 0) {

1024 MI.setDesc(TII->get(AMDGPU::COPY));

1025

1026 for (unsigned I = MI.getNumOperands() - 1; I != 1; --I)

1027 MI.removeOperand(I);

1028

1029 MI.getOperand(1).ChangeToRegister(BaseReg, false);

1030 } else {

1032 ImmOp->setImm(TotalOffset);

1033 }

1034

1035 return;

1036 }

1037 default:

1038 break;

1039 }

1040

1041 bool IsFlat = TII->isFLATScratch(MI);

1042

1043#ifndef NDEBUG

1044

1045 bool SeenFI = false;

1047 if (MO.isFI()) {

1048 if (SeenFI)

1050

1051 SeenFI = true;

1052 }

1053 }

1054#endif

1055

1057 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr

1058 : AMDGPU::OpName::vaddr);

1059

1062

1063 assert(FIOp && FIOp->isFI() && "frame index must be address operand");

1065

1066 if (IsFlat) {

1069 "offset should be legal");

1071 OffsetOp->setImm(NewOffset);

1072 return;

1073 }

1074

1075#ifndef NDEBUG

1076 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);

1078#endif

1079

1080 assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal");

1081

1083 OffsetOp->setImm(NewOffset);

1084}

1085

1088 int64_t Offset) const {

1089

1090 switch (MI->getOpcode()) {

1091 case AMDGPU::V_ADD_U32_e32:

1092 case AMDGPU::V_ADD_CO_U32_e32:

1093 return true;

1094 case AMDGPU::V_ADD_U32_e64:

1095 case AMDGPU::V_ADD_CO_U32_e64:

1097 default:

1098 break;

1099 }

1100

1102 return false;

1103

1105

1108 return TII->isLegalMUBUFImmOffset(NewOffset);

1109

1112}

1113

1116

1117

1118

1119 return &AMDGPU::VGPR_32RegClass;

1120}

1121

1124 return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;

1125}

1126

1129

1130 unsigned Op = MI.getOpcode();

1131 switch (Op) {

1132 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:

1133 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:

1134

1135

1136

1138 (uint64_t)TII->getNamedOperand(MI, AMDGPU::OpName::mask)->getImm());

1139 case AMDGPU::SI_SPILL_S1024_SAVE:

1140 case AMDGPU::SI_SPILL_S1024_RESTORE:

1141 case AMDGPU::SI_SPILL_V1024_SAVE:

1142 case AMDGPU::SI_SPILL_V1024_RESTORE:

1143 case AMDGPU::SI_SPILL_A1024_SAVE:

1144 case AMDGPU::SI_SPILL_A1024_RESTORE:

1145 case AMDGPU::SI_SPILL_AV1024_SAVE:

1146 case AMDGPU::SI_SPILL_AV1024_RESTORE:

1147 return 32;

1148 case AMDGPU::SI_SPILL_S512_SAVE:

1149 case AMDGPU::SI_SPILL_S512_RESTORE:

1150 case AMDGPU::SI_SPILL_V512_SAVE:

1151 case AMDGPU::SI_SPILL_V512_RESTORE:

1152 case AMDGPU::SI_SPILL_A512_SAVE:

1153 case AMDGPU::SI_SPILL_A512_RESTORE:

1154 case AMDGPU::SI_SPILL_AV512_SAVE:

1155 case AMDGPU::SI_SPILL_AV512_RESTORE:

1156 return 16;

1157 case AMDGPU::SI_SPILL_S384_SAVE:

1158 case AMDGPU::SI_SPILL_S384_RESTORE:

1159 case AMDGPU::SI_SPILL_V384_SAVE:

1160 case AMDGPU::SI_SPILL_V384_RESTORE:

1161 case AMDGPU::SI_SPILL_A384_SAVE:

1162 case AMDGPU::SI_SPILL_A384_RESTORE:

1163 case AMDGPU::SI_SPILL_AV384_SAVE:

1164 case AMDGPU::SI_SPILL_AV384_RESTORE:

1165 return 12;

1166 case AMDGPU::SI_SPILL_S352_SAVE:

1167 case AMDGPU::SI_SPILL_S352_RESTORE:

1168 case AMDGPU::SI_SPILL_V352_SAVE:

1169 case AMDGPU::SI_SPILL_V352_RESTORE:

1170 case AMDGPU::SI_SPILL_A352_SAVE:

1171 case AMDGPU::SI_SPILL_A352_RESTORE:

1172 case AMDGPU::SI_SPILL_AV352_SAVE:

1173 case AMDGPU::SI_SPILL_AV352_RESTORE:

1174 return 11;

1175 case AMDGPU::SI_SPILL_S320_SAVE:

1176 case AMDGPU::SI_SPILL_S320_RESTORE:

1177 case AMDGPU::SI_SPILL_V320_SAVE:

1178 case AMDGPU::SI_SPILL_V320_RESTORE:

1179 case AMDGPU::SI_SPILL_A320_SAVE:

1180 case AMDGPU::SI_SPILL_A320_RESTORE:

1181 case AMDGPU::SI_SPILL_AV320_SAVE:

1182 case AMDGPU::SI_SPILL_AV320_RESTORE:

1183 return 10;

1184 case AMDGPU::SI_SPILL_S288_SAVE:

1185 case AMDGPU::SI_SPILL_S288_RESTORE:

1186 case AMDGPU::SI_SPILL_V288_SAVE:

1187 case AMDGPU::SI_SPILL_V288_RESTORE:

1188 case AMDGPU::SI_SPILL_A288_SAVE:

1189 case AMDGPU::SI_SPILL_A288_RESTORE:

1190 case AMDGPU::SI_SPILL_AV288_SAVE:

1191 case AMDGPU::SI_SPILL_AV288_RESTORE:

1192 return 9;

1193 case AMDGPU::SI_SPILL_S256_SAVE:

1194 case AMDGPU::SI_SPILL_S256_RESTORE:

1195 case AMDGPU::SI_SPILL_V256_SAVE:

1196 case AMDGPU::SI_SPILL_V256_RESTORE:

1197 case AMDGPU::SI_SPILL_A256_SAVE:

1198 case AMDGPU::SI_SPILL_A256_RESTORE:

1199 case AMDGPU::SI_SPILL_AV256_SAVE:

1200 case AMDGPU::SI_SPILL_AV256_RESTORE:

1201 return 8;

1202 case AMDGPU::SI_SPILL_S224_SAVE:

1203 case AMDGPU::SI_SPILL_S224_RESTORE:

1204 case AMDGPU::SI_SPILL_V224_SAVE:

1205 case AMDGPU::SI_SPILL_V224_RESTORE:

1206 case AMDGPU::SI_SPILL_A224_SAVE:

1207 case AMDGPU::SI_SPILL_A224_RESTORE:

1208 case AMDGPU::SI_SPILL_AV224_SAVE:

1209 case AMDGPU::SI_SPILL_AV224_RESTORE:

1210 return 7;

1211 case AMDGPU::SI_SPILL_S192_SAVE:

1212 case AMDGPU::SI_SPILL_S192_RESTORE:

1213 case AMDGPU::SI_SPILL_V192_SAVE:

1214 case AMDGPU::SI_SPILL_V192_RESTORE:

1215 case AMDGPU::SI_SPILL_A192_SAVE:

1216 case AMDGPU::SI_SPILL_A192_RESTORE:

1217 case AMDGPU::SI_SPILL_AV192_SAVE:

1218 case AMDGPU::SI_SPILL_AV192_RESTORE:

1219 return 6;

1220 case AMDGPU::SI_SPILL_S160_SAVE:

1221 case AMDGPU::SI_SPILL_S160_RESTORE:

1222 case AMDGPU::SI_SPILL_V160_SAVE:

1223 case AMDGPU::SI_SPILL_V160_RESTORE:

1224 case AMDGPU::SI_SPILL_A160_SAVE:

1225 case AMDGPU::SI_SPILL_A160_RESTORE:

1226 case AMDGPU::SI_SPILL_AV160_SAVE:

1227 case AMDGPU::SI_SPILL_AV160_RESTORE:

1228 return 5;

1229 case AMDGPU::SI_SPILL_S128_SAVE:

1230 case AMDGPU::SI_SPILL_S128_RESTORE:

1231 case AMDGPU::SI_SPILL_V128_SAVE:

1232 case AMDGPU::SI_SPILL_V128_RESTORE:

1233 case AMDGPU::SI_SPILL_A128_SAVE:

1234 case AMDGPU::SI_SPILL_A128_RESTORE:

1235 case AMDGPU::SI_SPILL_AV128_SAVE:

1236 case AMDGPU::SI_SPILL_AV128_RESTORE:

1237 return 4;

1238 case AMDGPU::SI_SPILL_S96_SAVE:

1239 case AMDGPU::SI_SPILL_S96_RESTORE:

1240 case AMDGPU::SI_SPILL_V96_SAVE:

1241 case AMDGPU::SI_SPILL_V96_RESTORE:

1242 case AMDGPU::SI_SPILL_A96_SAVE:

1243 case AMDGPU::SI_SPILL_A96_RESTORE:

1244 case AMDGPU::SI_SPILL_AV96_SAVE:

1245 case AMDGPU::SI_SPILL_AV96_RESTORE:

1246 return 3;

1247 case AMDGPU::SI_SPILL_S64_SAVE:

1248 case AMDGPU::SI_SPILL_S64_RESTORE:

1249 case AMDGPU::SI_SPILL_V64_SAVE:

1250 case AMDGPU::SI_SPILL_V64_RESTORE:

1251 case AMDGPU::SI_SPILL_A64_SAVE:

1252 case AMDGPU::SI_SPILL_A64_RESTORE:

1253 case AMDGPU::SI_SPILL_AV64_SAVE:

1254 case AMDGPU::SI_SPILL_AV64_RESTORE:

1255 return 2;

1256 case AMDGPU::SI_SPILL_S32_SAVE:

1257 case AMDGPU::SI_SPILL_S32_RESTORE:

1258 case AMDGPU::SI_SPILL_V32_SAVE:

1259 case AMDGPU::SI_SPILL_V32_RESTORE:

1260 case AMDGPU::SI_SPILL_A32_SAVE:

1261 case AMDGPU::SI_SPILL_A32_RESTORE:

1262 case AMDGPU::SI_SPILL_AV32_SAVE:

1263 case AMDGPU::SI_SPILL_AV32_RESTORE:

1264 case AMDGPU::SI_SPILL_WWM_V32_SAVE:

1265 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:

1266 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:

1267 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:

1268 case AMDGPU::SI_SPILL_V16_SAVE:

1269 case AMDGPU::SI_SPILL_V16_RESTORE:

1270 return 1;

1272 }

1273}

1274

1276 switch (Opc) {

1277 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:

1278 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;

1279 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:

1280 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;

1281 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:

1282 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;

1283 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:

1284 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;

1285 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:

1286 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;

1287 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:

1288 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;

1289 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:

1290 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;

1291 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:

1292 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;

1293 default:

1294 return -1;

1295 }

1296}

1297

1299 switch (Opc) {

1300 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:

1301 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;

1302 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:

1303 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;

1304 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:

1305 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;

1306 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:

1307 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;

1308 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:

1309 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;

1310 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:

1311 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;

1312 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:

1313 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;

1314 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:

1315 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;

1316 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:

1317 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;

1318 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:

1319 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;

1320 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:

1321 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;

1322 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:

1323 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;

1324 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:

1325 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;

1326 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:

1327 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;

1328 default:

1329 return -1;

1330 }

1331}

1332

1334 switch (Opc) {

1335 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:

1336 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;

1337 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:

1338 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;

1339 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:

1340 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;

1341 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:

1342 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;

1343 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:

1344 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;

1345 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:

1346 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;

1347 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:

1348 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;

1349 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:

1350 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;

1351 default:

1352 return -1;

1353 }

1354}

1355

1357 switch (Opc) {

1358 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:

1359 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;

1360 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:

1361 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;

1362 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:

1363 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;

1364 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:

1365 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;

1366 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:

1367 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;

1368 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:

1369 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;

1370 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:

1371 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;

1372 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:

1373 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;

1374 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:

1375 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;

1376 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:

1377 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;

1378 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:

1379 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;

1380 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:

1381 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;

1382 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:

1383 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;

1384 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:

1385 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;

1386 default:

1387 return -1;

1388 }

1389}

1390

1394 int Index, unsigned Lane,

1395 unsigned ValueReg, bool IsKill) {

1399

1401

1402 if (Reg == AMDGPU::NoRegister)

1404

1405 bool IsStore = MI->mayStore();

1407 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());

1408

1409 unsigned Dst = IsStore ? Reg : ValueReg;

1410 unsigned Src = IsStore ? ValueReg : Reg;

1411 bool IsVGPR = TRI->isVGPR(MRI, Reg);

1413 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {

1414

1415

1416

1417

1418 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)

1421 return CopyMIB;

1422 }

1423 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64

1424 : AMDGPU::V_ACCVGPR_READ_B32_e64;

1425

1429 return MIB;

1430}

1431

1432

1433

1437 int Index,

1442 bool IsStore = MI->mayStore();

1443

1444 unsigned Opc = MI->getOpcode();

1445 int LoadStoreOp = IsStore ?

1447 if (LoadStoreOp == -1)

1448 return false;

1449

1452 return true;

1453

1457 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))

1458 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))

1463

1465 AMDGPU::OpName::vdata_in);

1466 if (VDataIn)

1467 NewMI.add(*VDataIn);

1468 return true;

1469}

1470

1472 unsigned LoadStoreOp,

1473 unsigned EltSize) {

1474 bool IsStore = TII->get(LoadStoreOp).mayStore();

1476 bool UseST =

1478

1479

1480 if (TII->isBlockLoadStore(LoadStoreOp))

1481 return LoadStoreOp;

1482

1483 switch (EltSize) {

1484 case 4:

1485 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

1486 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;

1487 break;

1488 case 8:

1489 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR

1490 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;

1491 break;

1492 case 12:

1493 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR

1494 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;

1495 break;

1496 case 16:

1497 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR

1498 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;

1499 break;

1500 default:

1502 }

1503

1504 if (HasVAddr)

1506 else if (UseST)

1508

1509 return LoadStoreOp;

1510}

1511

1514 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,

1517 assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");

1518

1523

1525 bool IsStore = Desc->mayStore();

1526 bool IsFlat = TII->isFLATScratch(LoadStoreOp);

1527 bool IsBlock = TII->isBlockLoadStore(LoadStoreOp);

1528

1529 bool CanClobberSCC = false;

1530 bool Scavenged = false;

1531 MCRegister SOffset = ScratchOffsetReg;

1532

1534

1535 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);

1537

1538

1539

1540

1541 unsigned EltSize = IsBlock ? RegWidth

1542 : (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u)

1543 : 4u;

1544 unsigned NumSubRegs = RegWidth / EltSize;

1545 unsigned Size = NumSubRegs * EltSize;

1546 unsigned RemSize = RegWidth - Size;

1547 unsigned NumRemSubRegs = RemSize ? 1 : 0;

1549 int64_t MaterializedOffset = Offset;

1550

1551 int64_t MaxOffset = Offset + Size + RemSize - EltSize;

1552 int64_t ScratchOffsetRegDelta = 0;

1553

1554 if (IsFlat && EltSize > 4) {

1556 Desc = &TII->get(LoadStoreOp);

1557 }

1558

1561

1562 assert((IsFlat || ((Offset % EltSize) == 0)) &&

1563 "unexpected VGPR spill offset");

1564

1565

1567

1568

1569 Register TmpIntermediateVGPR;

1570 bool UseVGPROffset = false;

1571

1572

1573

1574 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,

1575 int64_t VOffset) {

1576

1577 if (IsFlat && SGPRBase) {

1578

1579

1580

1581

1582 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {

1586 .addImm(0);

1587 } else {

1592 .addReg(TmpOffsetVGPR);

1593 }

1594 } else {

1595 assert(TmpOffsetVGPR);

1598 }

1599 };

1600

1601 bool IsOffsetLegal =

1604 : TII->isLegalMUBUFImmOffset(MaxOffset);

1605 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {

1607

1608

1609

1610

1611

1612 if (RS) {

1613 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);

1614

1615

1616 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);

1617 } else if (LiveUnits) {

1618 CanClobberSCC = LiveUnits->available(AMDGPU::SCC);

1619 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {

1621 SOffset = Reg;

1622 break;

1623 }

1624 }

1625 }

1626

1627 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)

1629

1630 if (!SOffset) {

1631 UseVGPROffset = true;

1632

1633 if (RS) {

1634 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);

1635 } else {

1637 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {

1639 TmpOffsetVGPR = Reg;

1640 break;

1641 }

1642 }

1643 }

1644

1645 assert(TmpOffsetVGPR);

1646 } else if (!SOffset && CanClobberSCC) {

1647

1648

1649

1650

1651

1652

1653

1654

1655

1656

1657 if (!ScratchOffsetReg)

1659 SOffset = ScratchOffsetReg;

1660 ScratchOffsetRegDelta = Offset;

1661 } else {

1662 Scavenged = true;

1663 }

1664

1665

1666

1667

1668 if (!IsFlat && !UseVGPROffset)

1669 Offset *= ST.getWavefrontSize();

1670

1671 if (!UseVGPROffset && !SOffset)

1672 report_fatal_error("could not scavenge SGPR to spill in entry function");

1673

1674 if (UseVGPROffset) {

1675

1676 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);

1677 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {

1679 } else {

1682 .addReg(ScratchOffsetReg)

1684 Add->getOperand(3).setIsDead();

1685 }

1686

1688 }

1689

1690 if (IsFlat && SOffset == AMDGPU::NoRegister) {

1691 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0

1692 && "Unexpected vaddr for flat scratch with a FI operand");

1693

1694 if (UseVGPROffset) {

1696 } else {

1697 assert(ST.hasFlatScratchSTMode());

1698 assert(TII->isBlockLoadStore(LoadStoreOp) && "Block ops don't have ST");

1700 }

1701

1702 Desc = &TII->get(LoadStoreOp);

1703 }

1704

1705 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;

1706 ++i, RegOffset += EltSize) {

1707 if (i == NumSubRegs) {

1708 EltSize = RemSize;

1710 }

1711 Desc = &TII->get(LoadStoreOp);

1712

1713 if (!IsFlat && UseVGPROffset) {

1716 Desc = &TII->get(NewLoadStoreOp);

1717 }

1718

1719 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {

1720

1721

1722

1723

1724

1725

1726 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);

1727 }

1728

1729 unsigned NumRegs = EltSize / 4;

1731 ? ValueReg

1732 : Register(getSubReg(ValueReg,

1734

1735 unsigned SOffsetRegState = 0;

1737 const bool IsLastSubReg = i + 1 == e;

1738 const bool IsFirstSubReg = i == 0;

1739 if (IsLastSubReg) {

1741

1743 }

1744

1745

1746

1747 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;

1748 bool NeedSuperRegImpOperand = e > 1;

1749

1750

1751

1752 unsigned RemEltSize = EltSize;

1753

1754

1755

1756

1757

1758

1759

1760 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,

1761 LaneE = RegOffset / 4;

1762 Lane >= LaneE; --Lane) {

1763 bool IsSubReg = e > 1 || EltSize > 4;

1766 : ValueReg;

1768 if (!MIB.getInstr())

1769 break;

1770 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {

1772 NeedSuperRegDef = false;

1773 }

1774 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {

1775 NeedSuperRegImpOperand = true;

1776 unsigned State = SrcDstRegState;

1777 if (!IsLastSubReg || (Lane != LaneE))

1779 if (!IsFirstSubReg || (Lane != LaneS))

1782 }

1783 RemEltSize -= 4;

1784 }

1785

1786 if (!RemEltSize)

1787 continue;

1788

1789 if (RemEltSize != EltSize) {

1790 assert(IsFlat && EltSize > 4);

1791

1792 unsigned NumRegs = RemEltSize / 4;

1797 }

1798

1799 unsigned FinalReg = SubReg;

1800

1801 if (IsAGPR) {

1802 assert(EltSize == 4);

1803

1804 if (!TmpIntermediateVGPR) {

1807 }

1808 if (IsStore) {

1810 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),

1811 TmpIntermediateVGPR)

1813 if (NeedSuperRegDef)

1815 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))

1818 }

1819 SubReg = TmpIntermediateVGPR;

1820 } else if (UseVGPROffset) {

1821 if (!TmpOffsetVGPR) {

1822 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,

1823 MI, false, 0);

1824 RS->setRegUsed(TmpOffsetVGPR);

1825 }

1826 }

1827

1828 Register FinalValueReg = ValueReg;

1829 if (LoadStoreOp == AMDGPU::SCRATCH_LOAD_USHORT_SADDR) {

1830

1831

1832 ValueReg =

1833 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);

1835 IsKill = false;

1836 }

1837

1842

1843 auto MIB =

1846

1847 if (UseVGPROffset) {

1848

1849

1850 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));

1851 }

1852

1853 if (!IsFlat)

1855

1856 if (SOffset == AMDGPU::NoRegister) {

1857 if (!IsFlat) {

1858 if (UseVGPROffset && ScratchOffsetReg) {

1859 MIB.addReg(ScratchOffsetReg);

1860 } else {

1862 MIB.addImm(0);

1863 }

1864 }

1865 } else {

1866 MIB.addReg(SOffset, SOffsetRegState);

1867 }

1868

1870

1873

1874 if (!IsFlat)

1875 MIB.addImm(0);

1876 MIB.addMemOperand(NewMMO);

1877

1878 if (FinalValueReg != ValueReg) {

1879

1880 ValueReg = getSubReg(ValueReg, AMDGPU::lo16);

1886 ValueReg = FinalValueReg;

1887 }

1888

1889 if (!IsAGPR && NeedSuperRegDef)

1891

1892 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {

1893 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),

1894 FinalReg)

1897 }

1898

1900 bool PartialReloadCopy = (RemEltSize != EltSize) && !IsStore;

1901 if (NeedSuperRegImpOperand &&

1902 (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) {

1904 if (PartialReloadCopy)

1906 }

1907

1908

1909

1910

1911

1912

1913

1914

1915

1916

1917

1918

1919

1920

1921

1922

1923

1924

1925

1926

1927

1928

1929 if (!IsStore && MI != MBB.end() && MI->isReturn() &&

1930 MI->readsRegister(SubReg, this)) {

1932 MIB->tieOperands(0, MIB->getNumOperands() - 1);

1933 }

1934

1935

1936

1937

1938

1939

1940 if (!IsStore && TII->isBlockLoadStore(LoadStoreOp))

1942 }

1943

1944 if (ScratchOffsetRegDelta != 0) {

1945

1948 .addImm(-ScratchOffsetRegDelta);

1949 }

1950}

1951

1957 Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);

1958 for (unsigned RegOffset = 1; RegOffset < 32; ++RegOffset)

1959 if (!(Mask & (1 << RegOffset)) &&

1960 isCalleeSavedPhysReg(BaseVGPR + RegOffset, *MF))

1962}

1963

1965 int Offset, bool IsLoad,

1966 bool IsKill) const {

1967

1970

1975

1981

1982 if (IsLoad) {

1983 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR

1984 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;

1987 } else {

1988 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

1989 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;

1992

1994 }

1995}

1996

2000 bool SpillToPhysVGPRLane) const {

2001 assert(MI->getOperand(0).isUndef() &&

2002 "undef spill should have been deleted earlier");

2003

2004 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);

2005

2009 bool SpillToVGPR = !VGPRSpills.empty();

2010 if (OnlyToVGPR && !SpillToVGPR)

2011 return false;

2012

2015

2016 if (SpillToVGPR) {

2017

2018

2019

2020

2021

2023 "Num of SGPRs spilled should be less than or equal to num of "

2024 "the VGPR lanes.");

2025

2026 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {

2032

2033 bool IsFirstSubreg = i == 0;

2034 bool IsLastSubreg = i == SB.NumSubRegs - 1;

2035 bool UseKill = SB.IsKill && IsLastSubreg;

2036

2037

2038

2039

2041 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)

2044 .addReg(Spill.VGPR);

2045 if (Indexes) {

2046 if (IsFirstSubreg)

2048 else

2050 }

2051

2052 if (IsFirstSubreg && SB.NumSubRegs > 1) {

2053

2054

2056 }

2057

2058 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))

2060

2061

2062

2063

2064 }

2065 } else {

2067

2068

2070

2071

2073

2076

2077

2078 for (unsigned i = Offset * PVD.PerVGPR,

2080 i < e; ++i) {

2085

2088 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)

2090 .addImm(i % PVD.PerVGPR)

2092 TmpVGPRFlags = 0;

2093

2094 if (Indexes) {

2095 if (i == 0)

2097 else

2099 }

2100

2101

2102

2104

2105 unsigned SuperKillState = 0;

2109 }

2110 }

2111

2112

2114 }

2115

2117 }

2118

2119 MI->eraseFromParent();

2121

2122 if (LIS)

2124

2125 return true;

2126}

2127

2131 bool SpillToPhysVGPRLane) const {

2132 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);

2133

2137 bool SpillToVGPR = !VGPRSpills.empty();

2138 if (OnlyToVGPR && !SpillToVGPR)

2139 return false;

2140

2141 if (SpillToVGPR) {

2142 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {

2147

2150 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)

2152 .addImm(Spill.Lane);

2155 if (Indexes) {

2156 if (i == e - 1)

2158 else

2160 }

2161 }

2162 } else {

2164

2165

2167

2169

2171

2172

2173 for (unsigned i = Offset * PVD.PerVGPR,

2175 i < e; ++i) {

2180

2181 bool LastSubReg = (i + 1 == e);

2183 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)

2188 if (Indexes) {

2189 if (i == e - 1)

2191 else

2193 }

2194 }

2195 }

2196

2198 }

2199

2200 MI->eraseFromParent();

2201

2202 if (LIS)

2204

2205 return true;

2206}

2207

2211 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,

2212 RS);

2214

2219

2220 for (unsigned i = Offset * PVD.PerVGPR,

2222 i < e; ++i) {

2227

2229 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),

2232 .addImm(i % PVD.PerVGPR)

2234 TmpVGPRFlags = 0;

2235

2236

2238

2239 unsigned SuperKillState = 0;

2243 }

2244 }

2245

2246 }

2247

2248

2249 MI = RestoreMBB.end();

2250 SB.setMI(&RestoreMBB, MI);

2251

2253

2254

2255 for (unsigned i = Offset * PVD.PerVGPR,

2257 i < e; ++i) {

2262

2264 bool LastSubReg = (i + 1 == e);

2265 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),

2271 }

2272 }

2274

2276 return false;

2277}

2278

2279

2280

2281

2285 switch (MI->getOpcode()) {

2286 case AMDGPU::SI_SPILL_S1024_SAVE:

2287 case AMDGPU::SI_SPILL_S512_SAVE:

2288 case AMDGPU::SI_SPILL_S384_SAVE:

2289 case AMDGPU::SI_SPILL_S352_SAVE:

2290 case AMDGPU::SI_SPILL_S320_SAVE:

2291 case AMDGPU::SI_SPILL_S288_SAVE:

2292 case AMDGPU::SI_SPILL_S256_SAVE:

2293 case AMDGPU::SI_SPILL_S224_SAVE:

2294 case AMDGPU::SI_SPILL_S192_SAVE:

2295 case AMDGPU::SI_SPILL_S160_SAVE:

2296 case AMDGPU::SI_SPILL_S128_SAVE:

2297 case AMDGPU::SI_SPILL_S96_SAVE:

2298 case AMDGPU::SI_SPILL_S64_SAVE:

2299 case AMDGPU::SI_SPILL_S32_SAVE:

2300 return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);

2301 case AMDGPU::SI_SPILL_S1024_RESTORE:

2302 case AMDGPU::SI_SPILL_S512_RESTORE:

2303 case AMDGPU::SI_SPILL_S384_RESTORE:

2304 case AMDGPU::SI_SPILL_S352_RESTORE:

2305 case AMDGPU::SI_SPILL_S320_RESTORE:

2306 case AMDGPU::SI_SPILL_S288_RESTORE:

2307 case AMDGPU::SI_SPILL_S256_RESTORE:

2308 case AMDGPU::SI_SPILL_S224_RESTORE:

2309 case AMDGPU::SI_SPILL_S192_RESTORE:

2310 case AMDGPU::SI_SPILL_S160_RESTORE:

2311 case AMDGPU::SI_SPILL_S128_RESTORE:

2312 case AMDGPU::SI_SPILL_S96_RESTORE:

2313 case AMDGPU::SI_SPILL_S64_RESTORE:

2314 case AMDGPU::SI_SPILL_S32_RESTORE:

2315 return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);

2316 default:

2318 }

2319}

2320

2322 int SPAdj, unsigned FIOperandNum,

2330

2331 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");

2332

2334 "unreserved scratch RSRC register");

2335

2337 int Index = MI->getOperand(FIOperandNum).getIndex();

2338

2342

2343 switch (MI->getOpcode()) {

2344

2345 case AMDGPU::SI_SPILL_S1024_SAVE:

2346 case AMDGPU::SI_SPILL_S512_SAVE:

2347 case AMDGPU::SI_SPILL_S384_SAVE:

2348 case AMDGPU::SI_SPILL_S352_SAVE:

2349 case AMDGPU::SI_SPILL_S320_SAVE:

2350 case AMDGPU::SI_SPILL_S288_SAVE:

2351 case AMDGPU::SI_SPILL_S256_SAVE:

2352 case AMDGPU::SI_SPILL_S224_SAVE:

2353 case AMDGPU::SI_SPILL_S192_SAVE:

2354 case AMDGPU::SI_SPILL_S160_SAVE:

2355 case AMDGPU::SI_SPILL_S128_SAVE:

2356 case AMDGPU::SI_SPILL_S96_SAVE:

2357 case AMDGPU::SI_SPILL_S64_SAVE:

2358 case AMDGPU::SI_SPILL_S32_SAVE: {

2360 }

2361

2362

2363 case AMDGPU::SI_SPILL_S1024_RESTORE:

2364 case AMDGPU::SI_SPILL_S512_RESTORE:

2365 case AMDGPU::SI_SPILL_S384_RESTORE:

2366 case AMDGPU::SI_SPILL_S352_RESTORE:

2367 case AMDGPU::SI_SPILL_S320_RESTORE:

2368 case AMDGPU::SI_SPILL_S288_RESTORE:

2369 case AMDGPU::SI_SPILL_S256_RESTORE:

2370 case AMDGPU::SI_SPILL_S224_RESTORE:

2371 case AMDGPU::SI_SPILL_S192_RESTORE:

2372 case AMDGPU::SI_SPILL_S160_RESTORE:

2373 case AMDGPU::SI_SPILL_S128_RESTORE:

2374 case AMDGPU::SI_SPILL_S96_RESTORE:

2375 case AMDGPU::SI_SPILL_S64_RESTORE:

2376 case AMDGPU::SI_SPILL_S32_RESTORE: {

2378 }

2379

2380

2381 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: {

2382

2383 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),

2384 AMDGPU::M0)

2385 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));

2386 [[fallthrough]];

2387 }

2388 case AMDGPU::SI_SPILL_V1024_SAVE:

2389 case AMDGPU::SI_SPILL_V512_SAVE:

2390 case AMDGPU::SI_SPILL_V384_SAVE:

2391 case AMDGPU::SI_SPILL_V352_SAVE:

2392 case AMDGPU::SI_SPILL_V320_SAVE:

2393 case AMDGPU::SI_SPILL_V288_SAVE:

2394 case AMDGPU::SI_SPILL_V256_SAVE:

2395 case AMDGPU::SI_SPILL_V224_SAVE:

2396 case AMDGPU::SI_SPILL_V192_SAVE:

2397 case AMDGPU::SI_SPILL_V160_SAVE:

2398 case AMDGPU::SI_SPILL_V128_SAVE:

2399 case AMDGPU::SI_SPILL_V96_SAVE:

2400 case AMDGPU::SI_SPILL_V64_SAVE:

2401 case AMDGPU::SI_SPILL_V32_SAVE:

2402 case AMDGPU::SI_SPILL_V16_SAVE:

2403 case AMDGPU::SI_SPILL_A1024_SAVE:

2404 case AMDGPU::SI_SPILL_A512_SAVE:

2405 case AMDGPU::SI_SPILL_A384_SAVE:

2406 case AMDGPU::SI_SPILL_A352_SAVE:

2407 case AMDGPU::SI_SPILL_A320_SAVE:

2408 case AMDGPU::SI_SPILL_A288_SAVE:

2409 case AMDGPU::SI_SPILL_A256_SAVE:

2410 case AMDGPU::SI_SPILL_A224_SAVE:

2411 case AMDGPU::SI_SPILL_A192_SAVE:

2412 case AMDGPU::SI_SPILL_A160_SAVE:

2413 case AMDGPU::SI_SPILL_A128_SAVE:

2414 case AMDGPU::SI_SPILL_A96_SAVE:

2415 case AMDGPU::SI_SPILL_A64_SAVE:

2416 case AMDGPU::SI_SPILL_A32_SAVE:

2417 case AMDGPU::SI_SPILL_AV1024_SAVE:

2418 case AMDGPU::SI_SPILL_AV512_SAVE:

2419 case AMDGPU::SI_SPILL_AV384_SAVE:

2420 case AMDGPU::SI_SPILL_AV352_SAVE:

2421 case AMDGPU::SI_SPILL_AV320_SAVE:

2422 case AMDGPU::SI_SPILL_AV288_SAVE:

2423 case AMDGPU::SI_SPILL_AV256_SAVE:

2424 case AMDGPU::SI_SPILL_AV224_SAVE:

2425 case AMDGPU::SI_SPILL_AV192_SAVE:

2426 case AMDGPU::SI_SPILL_AV160_SAVE:

2427 case AMDGPU::SI_SPILL_AV128_SAVE:

2428 case AMDGPU::SI_SPILL_AV96_SAVE:

2429 case AMDGPU::SI_SPILL_AV64_SAVE:

2430 case AMDGPU::SI_SPILL_AV32_SAVE:

2431 case AMDGPU::SI_SPILL_WWM_V32_SAVE:

2432 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {

2434 AMDGPU::OpName::vdata);

2436 MI->eraseFromParent();

2437 return true;

2438 }

2439

2440 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==

2442

2443 unsigned Opc;

2444 if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) {

2445 assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");

2446 Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;

2447 } else {

2448 Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE

2449 ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR

2450 : ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

2451 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;

2452 }

2453

2454 auto *MBB = MI->getParent();

2455 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());

2456 if (IsWWMRegSpill) {

2458 RS->isRegUsed(AMDGPU::SCC));

2459 }

2462 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),

2463 *MI->memoperands_begin(), RS);

2465 if (IsWWMRegSpill)

2467

2468 MI->eraseFromParent();

2469 return true;

2470 }

2471 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: {

2472

2473 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),

2474 AMDGPU::M0)

2475 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));

2476 [[fallthrough]];

2477 }

2478 case AMDGPU::SI_SPILL_V16_RESTORE:

2479 case AMDGPU::SI_SPILL_V32_RESTORE:

2480 case AMDGPU::SI_SPILL_V64_RESTORE:

2481 case AMDGPU::SI_SPILL_V96_RESTORE:

2482 case AMDGPU::SI_SPILL_V128_RESTORE:

2483 case AMDGPU::SI_SPILL_V160_RESTORE:

2484 case AMDGPU::SI_SPILL_V192_RESTORE:

2485 case AMDGPU::SI_SPILL_V224_RESTORE:

2486 case AMDGPU::SI_SPILL_V256_RESTORE:

2487 case AMDGPU::SI_SPILL_V288_RESTORE:

2488 case AMDGPU::SI_SPILL_V320_RESTORE:

2489 case AMDGPU::SI_SPILL_V352_RESTORE:

2490 case AMDGPU::SI_SPILL_V384_RESTORE:

2491 case AMDGPU::SI_SPILL_V512_RESTORE:

2492 case AMDGPU::SI_SPILL_V1024_RESTORE:

2493 case AMDGPU::SI_SPILL_A32_RESTORE:

2494 case AMDGPU::SI_SPILL_A64_RESTORE:

2495 case AMDGPU::SI_SPILL_A96_RESTORE:

2496 case AMDGPU::SI_SPILL_A128_RESTORE:

2497 case AMDGPU::SI_SPILL_A160_RESTORE:

2498 case AMDGPU::SI_SPILL_A192_RESTORE:

2499 case AMDGPU::SI_SPILL_A224_RESTORE:

2500 case AMDGPU::SI_SPILL_A256_RESTORE:

2501 case AMDGPU::SI_SPILL_A288_RESTORE:

2502 case AMDGPU::SI_SPILL_A320_RESTORE:

2503 case AMDGPU::SI_SPILL_A352_RESTORE:

2504 case AMDGPU::SI_SPILL_A384_RESTORE:

2505 case AMDGPU::SI_SPILL_A512_RESTORE:

2506 case AMDGPU::SI_SPILL_A1024_RESTORE:

2507 case AMDGPU::SI_SPILL_AV32_RESTORE:

2508 case AMDGPU::SI_SPILL_AV64_RESTORE:

2509 case AMDGPU::SI_SPILL_AV96_RESTORE:

2510 case AMDGPU::SI_SPILL_AV128_RESTORE:

2511 case AMDGPU::SI_SPILL_AV160_RESTORE:

2512 case AMDGPU::SI_SPILL_AV192_RESTORE:

2513 case AMDGPU::SI_SPILL_AV224_RESTORE:

2514 case AMDGPU::SI_SPILL_AV256_RESTORE:

2515 case AMDGPU::SI_SPILL_AV288_RESTORE:

2516 case AMDGPU::SI_SPILL_AV320_RESTORE:

2517 case AMDGPU::SI_SPILL_AV352_RESTORE:

2518 case AMDGPU::SI_SPILL_AV384_RESTORE:

2519 case AMDGPU::SI_SPILL_AV512_RESTORE:

2520 case AMDGPU::SI_SPILL_AV1024_RESTORE:

2521 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:

2522 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {

2524 AMDGPU::OpName::vdata);

2525 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==

2527

2528 unsigned Opc;

2529 if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {

2530 assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");

2531 Opc = ST.d16PreservesUnusedBits()

2532 ? AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16

2533 : AMDGPU::SCRATCH_LOAD_USHORT_SADDR;

2534 } else {

2535 Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE

2536 ? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR

2537 : ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR

2538 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;

2539 }

2540

2541 auto *MBB = MI->getParent();

2542 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());

2543 if (IsWWMRegSpill) {

2545 RS->isRegUsed(AMDGPU::SCC));

2546 }

2547

2550 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),

2551 *MI->memoperands_begin(), RS);

2552

2553 if (IsWWMRegSpill)

2555

2556 MI->eraseFromParent();

2557 return true;

2558 }

2559 case AMDGPU::V_ADD_U32_e32:

2560 case AMDGPU::V_ADD_U32_e64:

2561 case AMDGPU::V_ADD_CO_U32_e32:

2562 case AMDGPU::V_ADD_CO_U32_e64: {

2563

2564 unsigned NumDefs = MI->getNumExplicitDefs();

2565 unsigned Src0Idx = NumDefs;

2566

2567 bool HasClamp = false;

2569

2570 switch (MI->getOpcode()) {

2571 case AMDGPU::V_ADD_U32_e32:

2572 break;

2573 case AMDGPU::V_ADD_U32_e64:

2574 HasClamp = MI->getOperand(3).getImm();

2575 break;

2576 case AMDGPU::V_ADD_CO_U32_e32:

2577 VCCOp = &MI->getOperand(3);

2578 break;

2579 case AMDGPU::V_ADD_CO_U32_e64:

2580 VCCOp = &MI->getOperand(1);

2581 HasClamp = MI->getOperand(4).getImm();

2582 break;

2583 default:

2584 break;

2585 }

2586 bool DeadVCC = !VCCOp || VCCOp->isDead();

2589

2590 unsigned OtherOpIdx =

2591 FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;

2593

2594 unsigned Src1Idx = Src0Idx + 1;

2595 Register MaterializedReg = FrameReg;

2597

2598 int64_t Offset = FrameInfo.getObjectOffset(Index);

2599

2600

2601

2602 if (OtherOp->isImm()) {

2603 int64_t TotalOffset = OtherOp->getImm() + Offset;

2604

2607

2608

2609

2610 break;

2611 }

2612

2613 OtherOp->setImm(TotalOffset);

2615 }

2616

2617 if (FrameReg && !ST.enableFlatScratch()) {

2618

2619

2620

2621

2622

2623

2624

2625 ScavengedVGPR = RS->scavengeRegisterBackwards(

2626 AMDGPU::VGPR_32RegClass, MI, false, 0);

2627

2628

2629

2632 .addImm(ST.getWavefrontSizeLog2())

2634 MaterializedReg = ScavengedVGPR;

2635 }

2636

2637 if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {

2638 if (ST.enableFlatScratch() &&

2639 TII->isOperandLegal(*MI, Src1Idx, OtherOp)) {

2640

2641

2642

2643

2644

2645

2646 if (!ScavengedVGPR) {

2647 ScavengedVGPR = RS->scavengeRegisterBackwards(

2648 AMDGPU::VGPR_32RegClass, MI, false,

2649 0);

2650 }

2651

2652 assert(ScavengedVGPR != DstReg);

2653

2654 BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)

2655 .addReg(MaterializedReg,

2657 MaterializedReg = ScavengedVGPR;

2658 }

2659

2660

2661

2662

2665 if (NumDefs == 2)

2666 AddI32.add(MI->getOperand(1));

2667

2668 unsigned MaterializedRegFlags =

2670

2671 if (isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {

2672

2673

2674 AddI32

2675 .add(*OtherOp)

2676 .addReg(MaterializedReg, MaterializedRegFlags);

2677 } else {

2678

2679

2680 AddI32

2681 .addReg(MaterializedReg, MaterializedRegFlags)

2682 .add(*OtherOp);

2683 }

2684

2685 if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||

2686 MI->getOpcode() == AMDGPU::V_ADD_U32_e64)

2687 AddI32.addImm(0);

2688

2689 if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)

2690 AddI32.setOperandDead(3);

2691

2692 MaterializedReg = DstReg;

2693

2698 } else if (Offset != 0) {

2699 assert(!MaterializedReg);

2702 } else {

2703 if (DeadVCC && !HasClamp) {

2705

2706

2707

2708 if (OtherOp->isReg() && OtherOp->getReg() == DstReg) {

2709

2710 MI->eraseFromParent();

2711 return true;

2712 }

2713

2714

2715 MI->setDesc(TII->get(AMDGPU::V_MOV_B32_e32));

2716 MI->removeOperand(FIOperandNum);

2717

2718 unsigned NumOps = MI->getNumOperands();

2719 for (unsigned I = NumOps - 2; I >= NumDefs + 1; --I)

2720 MI->removeOperand(I);

2721

2722 if (NumDefs == 2)

2723 MI->removeOperand(1);

2724

2725

2726 return true;

2727 }

2728

2729

2730

2732 }

2733

2734

2735 if (TII->isOperandLegal(*MI, Src1Idx) && TII->commuteInstruction(*MI)) {

2737 std::swap(FIOperandNum, OtherOpIdx);

2738 }

2739

2740

2741

2742

2743 for (unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {

2744 if (TII->isOperandLegal(*MI, SrcIdx)) {

2745

2746

2747

2748 if (!ScavengedVGPR) {

2749 ScavengedVGPR = RS->scavengeRegisterBackwards(

2750 AMDGPU::VGPR_32RegClass, MI, false,

2751 0);

2752 }

2753

2754 assert(ScavengedVGPR != DstReg);

2755

2757 BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)

2758 .add(Src);

2759

2760 Src.ChangeToRegister(ScavengedVGPR, false);

2761 Src.setIsKill(true);

2762 break;

2763 }

2764 }

2765

2766

2767 if (FIOp->isImm() && FIOp->getImm() == 0 && DeadVCC && !HasClamp) {

2768 if (OtherOp->isReg() && OtherOp->getReg() != DstReg) {

2770 }

2771

2772 MI->eraseFromParent();

2773 }

2774

2775 return true;

2776 }

2777 case AMDGPU::S_ADD_I32:

2778 case AMDGPU::S_ADD_U32: {

2779

2780 unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;

2782

2784

2787 Register MaterializedReg = FrameReg;

2788

2789

2790 bool DeadSCC = MI->getOperand(3).isDead();

2791

2793

2794

2795

2798

2799 if (FrameReg && !ST.enableFlatScratch()) {

2800

2801

2802

2803 if (!TmpReg)

2804 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,

2805 MI, false, 0,

2806 false);

2807 if (TmpReg) {

2811 .addImm(ST.getWavefrontSizeLog2())

2813 }

2814 MaterializedReg = TmpReg;

2815 }

2816

2817 int64_t Offset = FrameInfo.getObjectOffset(Index);

2818

2819

2820

2821

2822 if (OtherOp.isImm()) {

2825

2826 if (MaterializedReg)

2828 else

2830 } else if (MaterializedReg) {

2831

2833

2834 if (!TmpReg && MaterializedReg == FrameReg) {

2835 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,

2836 MI, false, 0,

2837 false);

2838 DstReg = TmpReg;

2839 }

2840

2841 if (TmpReg) {

2845 .add(OtherOp);

2846 if (DeadSCC)

2848

2849 MaterializedReg = DstReg;

2850

2854 }

2856 } else {

2857

2858

2860 }

2861

2862 if (DeadSCC && OtherOp.isImm() && OtherOp.getImm() == 0) {

2864 MI->removeOperand(3);

2865 MI->removeOperand(OtherOpIdx);

2866 MI->setDesc(TII->get(FIOp->isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));

2867 } else if (DeadSCC && FIOp->isImm() && FIOp->getImm() == 0) {

2869 MI->removeOperand(3);

2870 MI->removeOperand(FIOperandNum);

2871 MI->setDesc(

2872 TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));

2873 }

2874

2876 return true;

2877 }

2878 default: {

2879 break;

2880 }

2881 }

2882

2883 int64_t Offset = FrameInfo.getObjectOffset(Index);

2884 if (ST.enableFlatScratch()) {

2885 if (TII->isFLATScratch(*MI)) {

2887 (int16_t)FIOperandNum ==

2888 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr));

2889

2890

2891 if (FrameReg)

2893

2895 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);

2899 OffsetOp->setImm(NewOffset);

2900 if (FrameReg)

2901 return false;

2903 }

2904

2906 unsigned Opc = MI->getOpcode();

2907 int NewOpc = -1;

2910 } else if (ST.hasFlatScratchSTMode()) {

2911

2912

2914 }

2915

2916 if (NewOpc != -1) {

2917

2918

2919 int VDstIn =

2920 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);

2921 bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() &&

2922 MI->getOperand(VDstIn).isTied();

2923 if (TiedVDst)

2924 MI->untieRegOperand(VDstIn);

2925

2926 MI->removeOperand(

2927 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));

2928

2929 if (TiedVDst) {

2930 int NewVDst =

2931 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);

2932 int NewVDstIn =

2933 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);

2934 assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");

2935 MI->tieOperands(NewVDst, NewVDstIn);

2936 }

2937 MI->setDesc(TII->get(NewOpc));

2938 return false;

2939 }

2940 }

2941 }

2942

2943 if (!FrameReg) {

2945 if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))

2946 return false;

2947 }

2948

2949

2950

2952 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);

2953

2954 if (Offset && FrameReg && UseSGPR) {

2955 FIOp->setReg(FrameReg);

2956 return false;

2957 }

2958

2960 UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;

2961

2963 RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);

2964 FIOp->setReg(TmpReg);

2966

2967 if ((!FrameReg || Offset) && TmpReg) {

2968 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;

2970 if (FrameReg)

2971 MIB.addReg(FrameReg);

2972 else

2974

2975 return false;

2976 }

2977

2978 bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&

2979 MI->definesRegister(AMDGPU::SCC, nullptr);

2980

2982 UseSGPR ? TmpReg

2983 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,

2984 MI, false, 0, !UseSGPR);

2985

2986 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) {

2988 if (ST.hasFlatScratchSVSMode() && SVOpcode != -1) {

2989 Register TmpVGPR = RS->scavengeRegisterBackwards(

2990 AMDGPU::VGPR_32RegClass, MI, false, 0, true);

2991

2992

2993 auto MIB =

2994 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR);

2995 if (FrameReg)

2996 MIB.addReg(FrameReg);

2997 else

2999

3000

3001 if (FrameReg && Offset)

3002 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), FrameReg)

3005

3007 .add(MI->getOperand(0))

3008 .addReg(TmpVGPR)

3009 .addImm(0)

3010 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::cpol));

3011 MI->eraseFromParent();

3012 return true;

3013 }

3015 }

3016

3017 if (!TmpSReg) {

3018

3019 TmpSReg = FrameReg;

3020 FIOp->setReg(FrameReg);

3022 }

3023

3024 if (NeedSaveSCC) {

3025 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");

3035 } else {

3039 }

3040

3041 if (!UseSGPR)

3044

3045 if (TmpSReg == FrameReg) {

3046

3047 if (NeedSaveSCC &&

3048 MI->registerDefIsDead(AMDGPU::SCC, nullptr)) {

3051 TmpSReg)

3054 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))

3057 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),

3058 TmpSReg)

3061 } else {

3063 FrameReg)

3066 }

3067 }

3068

3069 return false;

3070 }

3071

3072 bool IsMUBUF = TII->isMUBUF(*MI);

3073

3075

3076

3077 bool IsSALU = isSGPRClass(TII->getRegClass(MI->getDesc(), FIOperandNum));

3078 bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&

3079 MI->definesRegister(AMDGPU::SCC, nullptr);

3081 ? &AMDGPU::SReg_32RegClass

3082 : &AMDGPU::VGPR_32RegClass;

3083 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||

3084 MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||

3085 MI->getOpcode() == AMDGPU::S_MOV_B32;

3087 IsCopy ? MI->getOperand(0).getReg()

3088 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);

3089

3090 int64_t Offset = FrameInfo.getObjectOffset(Index);

3092 unsigned OpCode =

3093 IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;

3094 Register TmpResultReg = ResultReg;

3095 if (IsSALU && LiveSCC) {

3096 TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,

3097 MI, false, 0);

3098 }

3099

3100 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);

3101 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)

3102

3103

3104 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);

3105 else

3106 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());

3107 if (IsSALU && !LiveSCC)

3108 Shift.getInstr()->getOperand(3).setIsDead();

3109 if (IsSALU && LiveSCC) {

3111 if (IsCopy) {

3113 NewDest = ResultReg;

3114 } else {

3115 NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,

3116 Shift, false, 0);

3117 }

3118 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest)

3119 .addReg(TmpResultReg);

3120 ResultReg = NewDest;

3121 }

3122 } else {

3124 if (!IsSALU) {

3125 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=

3126 nullptr) {

3127

3128 Register ScaledReg = ResultReg;

3129

3130 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),

3131 ScaledReg)

3132 .addImm(ST.getWavefrontSizeLog2())

3134

3135 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;

3136

3137

3138 if (IsVOP2 ||

3140

3143 if (!IsVOP2)

3144 MIB.addImm(0);

3145 } else {

3147 "Need to reuse carry out register");

3148

3149

3151 if (!isWave32)

3152 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);

3153 else

3154 ConstOffsetReg = MIB.getReg(1);

3155

3157 ConstOffsetReg)

3161 MIB.addImm(0);

3162 }

3163 }

3164 }

3165 if (!MIB || IsSALU) {

3166

3167

3168

3169

3170

3171

3172 Register TmpScaledReg = IsCopy && IsSALU

3173 ? ResultReg

3174 : RS->scavengeRegisterBackwards(

3175 AMDGPU::SReg_32_XM0RegClass, MI,

3176 false, 0, false);

3177 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;

3178 Register TmpResultReg = ScaledReg;

3179

3180 if (!LiveSCC) {

3181 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)

3183 .addImm(ST.getWavefrontSizeLog2());

3184 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)

3187 } else {

3188 TmpResultReg = RS->scavengeRegisterBackwards(

3189 AMDGPU::VGPR_32RegClass, MI, false, 0, true);

3190

3192 if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {

3194 TmpResultReg)

3195 .addImm(ST.getWavefrontSizeLog2())

3197 if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {

3202 .addImm(0);

3203 } else

3205 } else {

3207 "offset is unsafe for v_mad_u32_u24");

3208

3209

3210

3211

3212

3213

3214

3215

3216 bool IsInlinableLiteral =

3218 if (!IsInlinableLiteral) {

3220 TmpResultReg)

3222 }

3223

3225 TmpResultReg);

3226

3227 if (!IsInlinableLiteral) {

3229 } else {

3230

3232 }

3233 Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);

3235 TmpResultReg)

3236 .addImm(ST.getWavefrontSizeLog2())

3237 .addReg(TmpResultReg);

3238 }

3239

3241 if (IsCopy) {

3242 NewDest = ResultReg;

3243 } else {

3244 NewDest = RS->scavengeRegisterBackwards(

3245 AMDGPU::SReg_32_XM0RegClass, *Add, false, 0,

3246 true);

3247 }

3248

3250 NewDest)

3251 .addReg(TmpResultReg);

3252 ResultReg = NewDest;

3253 }

3254 if (!IsSALU)

3257 else

3258 ResultReg = TmpResultReg;

3259

3260 if (!TmpScaledReg.isValid()) {

3266 .addImm(ST.getWavefrontSizeLog2());

3267 }

3268 }

3269 }

3270

3271

3272 if (IsCopy) {

3273 MI->eraseFromParent();

3274 return true;

3275 }

3277 return false;

3278 }

3279

3280 if (IsMUBUF) {

3281

3283 static_cast<int>(FIOperandNum) ==

3284 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr));

3285

3286 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);

3287 assert((SOffset.isImm() && SOffset.getImm() == 0));

3288

3289 if (FrameReg != AMDGPU::NoRegister)

3290 SOffset.ChangeToRegister(FrameReg, false);

3291

3292 int64_t Offset = FrameInfo.getObjectOffset(Index);

3293 int64_t OldImm =

3294 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();

3295 int64_t NewOffset = OldImm + Offset;

3296

3297 if (TII->isLegalMUBUFImmOffset(NewOffset) &&

3299 MI->eraseFromParent();

3300 return true;

3301 }

3302 }

3303

3304

3305

3306

3308 if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {

3310 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);

3314 }

3315

3316 return false;

3317}

3318

3322

3326

3330

3334 return &AMDGPU::VReg_64RegClass;

3336 return &AMDGPU::VReg_96RegClass;

3338 return &AMDGPU::VReg_128RegClass;

3340 return &AMDGPU::VReg_160RegClass;

3342 return &AMDGPU::VReg_192RegClass;

3344 return &AMDGPU::VReg_224RegClass;

3346 return &AMDGPU::VReg_256RegClass;

3348 return &AMDGPU::VReg_288RegClass;

3350 return &AMDGPU::VReg_320RegClass;

3352 return &AMDGPU::VReg_352RegClass;

3354 return &AMDGPU::VReg_384RegClass;

3356 return &AMDGPU::VReg_512RegClass;

3358 return &AMDGPU::VReg_1024RegClass;

3359

3360 return nullptr;

3361}

3362

3366 return &AMDGPU::VReg_64_Align2RegClass;

3368 return &AMDGPU::VReg_96_Align2RegClass;

3370 return &AMDGPU::VReg_128_Align2RegClass;

3372 return &AMDGPU::VReg_160_Align2RegClass;

3374 return &AMDGPU::VReg_192_Align2RegClass;

3376 return &AMDGPU::VReg_224_Align2RegClass;

3378 return &AMDGPU::VReg_256_Align2RegClass;

3380 return &AMDGPU::VReg_288_Align2RegClass;

3382 return &AMDGPU::VReg_320_Align2RegClass;

3384 return &AMDGPU::VReg_352_Align2RegClass;

3386 return &AMDGPU::VReg_384_Align2RegClass;

3388 return &AMDGPU::VReg_512_Align2RegClass;

3390 return &AMDGPU::VReg_1024_Align2RegClass;

3391

3392 return nullptr;

3393}

3394

3398 return &AMDGPU::VReg_1RegClass;

3400 return &AMDGPU::VGPR_16RegClass;

3402 return &AMDGPU::VGPR_32RegClass;

3405}

3406

3410 return &AMDGPU::VGPR_32_Lo256RegClass;

3412 return &AMDGPU::VReg_64_Lo256_Align2RegClass;

3414 return &AMDGPU::VReg_96_Lo256_Align2RegClass;

3416 return &AMDGPU::VReg_128_Lo256_Align2RegClass;

3418 return &AMDGPU::VReg_160_Lo256_Align2RegClass;

3420 return &AMDGPU::VReg_192_Lo256_Align2RegClass;

3422 return &AMDGPU::VReg_224_Lo256_Align2RegClass;

3424 return &AMDGPU::VReg_256_Lo256_Align2RegClass;

3426 return &AMDGPU::VReg_288_Lo256_Align2RegClass;

3428 return &AMDGPU::VReg_320_Lo256_Align2RegClass;

3430 return &AMDGPU::VReg_352_Lo256_Align2RegClass;

3432 return &AMDGPU::VReg_384_Lo256_Align2RegClass;

3434 return &AMDGPU::VReg_512_Lo256_Align2RegClass;

3436 return &AMDGPU::VReg_1024_Lo256_Align2RegClass;

3437

3438 return nullptr;

3439}

3440

3444 return &AMDGPU::AReg_64RegClass;

3446 return &AMDGPU::AReg_96RegClass;

3448 return &AMDGPU::AReg_128RegClass;

3450 return &AMDGPU::AReg_160RegClass;

3452 return &AMDGPU::AReg_192RegClass;

3454 return &AMDGPU::AReg_224RegClass;

3456 return &AMDGPU::AReg_256RegClass;

3458 return &AMDGPU::AReg_288RegClass;

3460 return &AMDGPU::AReg_320RegClass;

3462 return &AMDGPU::AReg_352RegClass;

3464 return &AMDGPU::AReg_384RegClass;

3466 return &AMDGPU::AReg_512RegClass;

3468 return &AMDGPU::AReg_1024RegClass;

3469

3470 return nullptr;

3471}

3472

3476 return &AMDGPU::AReg_64_Align2RegClass;

3478 return &AMDGPU::AReg_96_Align2RegClass;

3480 return &AMDGPU::AReg_128_Align2RegClass;

3482 return &AMDGPU::AReg_160_Align2RegClass;

3484 return &AMDGPU::AReg_192_Align2RegClass;

3486 return &AMDGPU::AReg_224_Align2RegClass;

3488 return &AMDGPU::AReg_256_Align2RegClass;

3490 return &AMDGPU::AReg_288_Align2RegClass;

3492 return &AMDGPU::AReg_320_Align2RegClass;

3494 return &AMDGPU::AReg_352_Align2RegClass;

3496 return &AMDGPU::AReg_384_Align2RegClass;

3498 return &AMDGPU::AReg_512_Align2RegClass;

3500 return &AMDGPU::AReg_1024_Align2RegClass;

3501

3502 return nullptr;

3503}

3504

3508 return &AMDGPU::AGPR_LO16RegClass;

3510 return &AMDGPU::AGPR_32RegClass;

3513}

3514

3518 return &AMDGPU::AV_64RegClass;

3520 return &AMDGPU::AV_96RegClass;

3522 return &AMDGPU::AV_128RegClass;

3524 return &AMDGPU::AV_160RegClass;

3526 return &AMDGPU::AV_192RegClass;

3528 return &AMDGPU::AV_224RegClass;

3530 return &AMDGPU::AV_256RegClass;

3532 return &AMDGPU::AV_288RegClass;

3534 return &AMDGPU::AV_320RegClass;

3536 return &AMDGPU::AV_352RegClass;

3538 return &AMDGPU::AV_384RegClass;

3540 return &AMDGPU::AV_512RegClass;

3542 return &AMDGPU::AV_1024RegClass;

3543

3544 return nullptr;

3545}

3546

3550 return &AMDGPU::AV_64_Align2RegClass;

3552 return &AMDGPU::AV_96_Align2RegClass;

3554 return &AMDGPU::AV_128_Align2RegClass;

3556 return &AMDGPU::AV_160_Align2RegClass;

3558 return &AMDGPU::AV_192_Align2RegClass;

3560 return &AMDGPU::AV_224_Align2RegClass;

3562 return &AMDGPU::AV_256_Align2RegClass;

3564 return &AMDGPU::AV_288_Align2RegClass;

3566 return &AMDGPU::AV_320_Align2RegClass;

3568 return &AMDGPU::AV_352_Align2RegClass;

3570 return &AMDGPU::AV_384_Align2RegClass;

3572 return &AMDGPU::AV_512_Align2RegClass;

3574 return &AMDGPU::AV_1024_Align2RegClass;

3575

3576 return nullptr;

3577}

3578

3582 return &AMDGPU::AV_32RegClass;

3583 return ST.needsAlignedVGPRs()

3586}

3587

3590

3591

3592

3593

3594

3597}

3598

3602 return &AMDGPU::SReg_32RegClass;

3604 return &AMDGPU::SReg_64RegClass;

3606 return &AMDGPU::SGPR_96RegClass;

3608 return &AMDGPU::SGPR_128RegClass;

3610 return &AMDGPU::SGPR_160RegClass;

3612 return &AMDGPU::SGPR_192RegClass;

3614 return &AMDGPU::SGPR_224RegClass;

3616 return &AMDGPU::SGPR_256RegClass;

3618 return &AMDGPU::SGPR_288RegClass;

3620 return &AMDGPU::SGPR_320RegClass;

3622 return &AMDGPU::SGPR_352RegClass;

3624 return &AMDGPU::SGPR_384RegClass;

3626 return &AMDGPU::SGPR_512RegClass;

3628 return &AMDGPU::SGPR_1024RegClass;

3629

3630 return nullptr;

3631}

3632

3636 if (Reg.isVirtual())

3637 RC = MRI.getRegClass(Reg);

3638 else

3639 RC = getPhysRegBaseClass(Reg);

3641}

3642

3645 unsigned Size = getRegSizeInBits(*SRC);

3646

3647 switch (SRC->getID()) {

3648 default:

3649 break;

3650 case AMDGPU::VS_32_Lo256RegClassID:

3651 case AMDGPU::VS_64_Lo256RegClassID:

3653 }

3654

3657 assert(VRC && "Invalid register class size");

3658 return VRC;

3659}

3660

3663 unsigned Size = getRegSizeInBits(*SRC);

3665 assert(ARC && "Invalid register class size");

3666 return ARC;

3667}

3668

3671 unsigned Size = getRegSizeInBits(*SRC);

3673 assert(ARC && "Invalid register class size");

3674 return ARC;

3675}

3676

3679 unsigned Size = getRegSizeInBits(*VRC);

3680 if (Size == 32)

3681 return &AMDGPU::SGPR_32RegClass;

3683 assert(SRC && "Invalid register class size");

3684 return SRC;

3685}

3686

3690 unsigned SubIdx) const {

3691

3693 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);

3694 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;

3695}

3696

3700 return !ST.hasMFMAInlineLiteralBug();

3701

3704}

3705

3711

3712

3713

3714

3715

3718 const MachineFunction &MF, bool ReserveHighestRegister) const {

3719 if (ReserveHighestRegister) {

3721 if (MRI.isAllocatable(Reg) && MRI.isPhysRegUsed(Reg))

3722 return Reg;

3723 } else {

3725 if (MRI.isAllocatable(Reg) && MRI.isPhysRegUsed(Reg))

3726 return Reg;

3727 }

3729}

3730

3734 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());

3735 if (!RB)

3736 return false;

3737

3739}

3740

3742 unsigned EltSize) const {

3744 assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);

3745

3746 const unsigned RegHalves = RegBitWidth / 16;

3747 const unsigned EltHalves = EltSize / 2;

3748 assert(RegSplitParts.size() + 1 >= EltHalves);

3749

3750 const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];

3751 const unsigned NumParts = RegHalves / EltHalves;

3752

3753 return ArrayRef(Parts.data(), NumParts);

3754}

3755

3759 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);

3760}

3761

3766 return getSubRegisterClass(SrcRC, MO.getSubReg());

3767}

3768

3775

3783

3786 unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;

3787 switch (RC->getID()) {

3788 default:

3789 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);

3790 case AMDGPU::VGPR_32RegClassID:

3791 return std::min(

3792 ST.getMaxNumVGPRs(

3793 MinOcc,

3795 ST.getMaxNumVGPRs(MF));

3796 case AMDGPU::SGPR_32RegClassID:

3797 case AMDGPU::SGPR_LO16RegClassID:

3798 return std::min(ST.getMaxNumSGPRs(MinOcc, true), ST.getMaxNumSGPRs(MF));

3799 }

3800}

3801

3803 unsigned Idx) const {

3804 switch (static_castAMDGPU::RegisterPressureSets\(Idx)) {

3805 case AMDGPU::RegisterPressureSets::VGPR_32:

3806 case AMDGPU::RegisterPressureSets::AGPR_32:

3809 case AMDGPU::RegisterPressureSets::SReg_32:

3812 }

3813

3815}

3816

3818 static const int Empty[] = { -1 };

3819

3820 if (RegPressureIgnoredUnits[static_cast<unsigned>(RegUnit)])

3822

3823 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);

3824}

3825

3832

3835

3836 std::pair<unsigned, Register> Hint = MRI.getRegAllocationHint(VirtReg);

3837

3838 switch (Hint.first) {

3840 Register Paired = Hint.second;

3844 PairedPhys =

3845 getMatchingSuperReg(Paired, AMDGPU::lo16, &AMDGPU::VGPR_32RegClass);

3846 } else if (VRM && VRM->hasPhys(Paired)) {

3847 PairedPhys = getMatchingSuperReg(VRM->getPhys(Paired), AMDGPU::lo16,

3848 &AMDGPU::VGPR_32RegClass);

3849 }

3850

3851

3852 if (PairedPhys)

3853

3854

3856 return false;

3857 }

3859 Register Paired = Hint.second;

3863 PairedPhys = TRI->getSubReg(Paired, AMDGPU::lo16);

3864 } else if (VRM && VRM->hasPhys(Paired)) {

3865 PairedPhys = TRI->getSubReg(VRM->getPhys(Paired), AMDGPU::lo16);

3866 }

3867

3868

3869 if (PairedPhys)

3871 else {

3872

3873

3874

3875

3876

3877 for (MCPhysReg PhysReg : Order) {

3879 continue;

3880 if (AMDGPU::VGPR_16RegClass.contains(PhysReg) &&

3881 MRI.isReserved(PhysReg))

3883 }

3884 }

3885 return false;

3886 }

3887 default:

3889 VRM);

3890 }

3891}

3892

3894

3895 return AMDGPU::SGPR30_SGPR31;

3896}

3897

3901 switch (RB.getID()) {

3902 case AMDGPU::VGPRRegBankID:

3904 std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));

3905 case AMDGPU::VCCRegBankID:

3908 case AMDGPU::SGPRRegBankID:

3910 case AMDGPU::AGPRRegBankID:

3912 default:

3914 }

3915}

3916

3923

3925 return getAllocatableClass(RC);

3926

3927 return nullptr;

3928}

3929

3931 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;

3932}

3933

3935 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;

3936}

3937

3939

3940 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass

3941 : &AMDGPU::VReg_64RegClass;

3942}

3943

3944

3952

3953 if (Reg.isVirtual()) {

3955 return nullptr;

3958 : MRI.getMaxLaneMaskForVReg(Reg);

3959 VNInfo *V = nullptr;

3961 for (auto &S : LI.subranges()) {

3962 if ((S.LaneMask & SubLanes) == SubLanes) {

3963 V = S.getVNInfoAt(UseIdx);

3964 break;

3965 }

3966 }

3967 } else {

3969 }

3970 if (!V)

3971 return nullptr;

3972 DefIdx = V->def;

3973 } else {

3974

3975 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {

3978 if (!DefIdx.isValid() ||

3981 DefIdx = V->def;

3982 } else {

3983 return nullptr;

3984 }

3985 }

3986 }

3987

3989

3990 if (!Def || !MDT.dominates(Def, &Use))

3991 return nullptr;

3992

3993 assert(Def->modifiesRegister(Reg, this));

3994

3995 return Def;

3996}

3997

3999 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);

4000

4002 AMDGPU::SReg_32RegClass,

4003 AMDGPU::AGPR_32RegClass } ) {

4004 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))

4005 return Super;

4006 }

4007 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,

4008 &AMDGPU::VGPR_32RegClass)) {

4009 return Super;

4010 }

4011

4012 return AMDGPU::NoRegister;

4013}

4014

4016 if (!ST.needsAlignedVGPRs())

4017 return true;

4018

4026

4027 assert(&RC != &AMDGPU::VS_64RegClass);

4028

4029 return true;

4030}

4031

4034 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);

4035}

4036

4039 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);

4040}

4041

4044 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));

4045}

4046

4047unsigned

4049 unsigned SubReg) const {

4052 return std::min(128u, getSubRegIdxSize(SubReg));

4056 return std::min(32u, getSubRegIdxSize(SubReg));

4057 default:

4058 break;

4059 }

4060 return 0;

4061}

4062

4065 bool IncludeCalls) const {

4066 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;

4068 (RC.getID() == AMDGPU::VGPR_32RegClassID)

4069 ? RC.getRegisters().take_front(NumArchVGPRs)

4072 if (MRI.isPhysRegUsed(Reg, !IncludeCalls))

4074 return 0;

4075}

4076

4084 return RegFlags;

4085}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Provides AMDGPU specific target descriptions.

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static const Function * getParent(const Value *V)

AMD GCN specific subclass of TargetSubtarget.

const HexagonInstrInfo * TII

std::pair< Instruction::BinaryOps, Value * > OffsetOp

Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

Return the first DebugLoc that has line number information, given a range of instructions.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

This file declares the machine register scavenger class.

SI Pre allocate WWM Registers

static int getOffenMUBUFStore(unsigned Opc)

Definition SIRegisterInfo.cpp:1333

static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)

Definition SIRegisterInfo.cpp:3442

static int getOffsetMUBUFLoad(unsigned Opc)

Definition SIRegisterInfo.cpp:1298

static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap

Definition SIRegisterInfo.cpp:45

static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, const SIInstrInfo *TII)

Definition SIRegisterInfo.cpp:1127

static void emitUnsupportedError(const Function &Fn, const MachineInstr &MI, const Twine &ErrMsg)

Definition SIRegisterInfo.cpp:48

static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)

Definition SIRegisterInfo.cpp:3474

static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)

Definition SIRegisterInfo.cpp:1434

static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)

Definition SIRegisterInfo.cpp:1471

static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)

Definition SIRegisterInfo.cpp:3364

static int getOffsetMUBUFStore(unsigned Opc)

Definition SIRegisterInfo.cpp:1275

static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)

Definition SIRegisterInfo.cpp:3332

static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))

static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)

Definition SIRegisterInfo.cpp:3548

static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)

Definition SIRegisterInfo.cpp:3516

static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)

Definition SIRegisterInfo.cpp:1391

static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI, const MachineInstr &MI)

Definition SIRegisterInfo.cpp:838

static int getOffenMUBUFLoad(unsigned Opc)

Definition SIRegisterInfo.cpp:1356

Interface definition for SIRegisterInfo.

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty

static const char * getRegisterName(MCRegister Reg)

bool isBottomOfStack() const

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

bool test(unsigned Idx) const

bool empty() const

empty - Tests whether there are no bits in this bitvector.

Diagnostic information for unsupported feature in backend.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

LLVM_ABI void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

LiveInterval - This class represents the liveness of a register, or stack slot.

bool hasSubRanges() const

Returns true if subregister liveness information is available.

iterator_range< subrange_iterator > subranges()

void removeAllRegUnitsForPhysReg(MCRegister Reg)

Remove associated live ranges for the register units associated with Reg.

bool hasInterval(Register Reg) const

MachineInstr * getInstructionFromIndex(SlotIndex index) const

Returns the instruction associated with the given index.

MachineDominatorTree & getDomTree()

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

LiveInterval & getInterval(Register Reg)

LiveRange & getRegUnit(MCRegUnit Unit)

Return the live range for register unit Unit.

This class represents the liveness of a register, stack slot, etc.

VNInfo * getVNInfoAt(SlotIndex Idx) const

getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.

A set of register units used to track register liveness.

bool available(MCRegister Reg) const

Returns true if no part of physical register Reg is live.

Describe properties that are true of each instruction in the target description file.

MCRegAliasIterator enumerates all registers aliasing Reg.

Wrapper class representing physical registers. Should be passed by value.

static MCRegister from(unsigned Val)

Check the provided unsigned value is a valid MCRegister.

Generic base class for all target subtargets.

MachineInstrBundleIterator< MachineInstr > iterator

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

bool hasCalls() const

Return true if the current function has any function calls.

Align getObjectAlign(int ObjectIdx) const

Return the alignment of the specified stack object.

bool hasStackObjects() const

Return true if there are any stack objects in this function.

uint8_t getStackID(int ObjectIdx) const

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

bool isFixedObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a fixed stack object.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Register getReg(unsigned Idx) const

Get the register for the operand index.

const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addFrameIndex(int Idx) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

void setAsmPrinterFlag(uint8_t Flag)

Set a flag for the AsmPrinter.

LLVM_ABI const MachineFunction * getMF() const

Return the function that contains the basic block that this instruction belongs to.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

@ MOLoad

The memory access reads data.

@ MOStore

The memory access writes data.

const MachinePointerInfo & getPointerInfo() const

Flags getFlags() const

Return the raw flags of the source value,.

MachineOperand class - Representation of each machine instruction operand.

unsigned getSubReg() const

void setImm(int64_t immVal)

LLVM_ABI void setIsRenamable(bool Val=true)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

void setIsDead(bool Val=true)

LLVM_ABI void setReg(Register Reg)

Change the register this operand corresponds to.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)

ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.

void setIsKill(bool Val=true)

LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)

ChangeToRegister - Replace this operand with a new register operand of the specified value.

Register getReg() const

getReg - Returns the register number.

bool isFI() const

isFI - Tests if this is a MO_FrameIndex operand.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

bool isReserved(MCRegister PhysReg) const

isReserved - Returns true when PhysReg is a reserved register.

Holds all the information related to register banks.

virtual bool isDivergentRegBank(const RegisterBank *RB) const

Returns true if the register bank is considered divergent.

const RegisterBank & getRegBank(unsigned ID)

Get the register bank identified by ID.

This class implements the register bank concept.

unsigned getID() const

Get the identifier of this register bank.

Wrapper class representing virtual and physical registers.

constexpr bool isValid() const

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

static bool isFLATScratch(const MachineInstr &MI)

static bool isMUBUF(const MachineInstr &MI)

static bool isVOP3(const MCInstrDesc &Desc)

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const

MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const

Register getLongBranchReservedReg() const

unsigned getDynamicVGPRBlockSize() const

Register getStackPtrOffsetReg() const

Register getScratchRSrcReg() const

Returns the physical register reserved for use as the resource descriptor for scratch accesses.

ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const

ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const

uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const

Register getSGPRForEXECCopy() const

ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const

Register getVGPRForAGPRCopy() const

Register getFrameOffsetReg() const

BitVector getNonWWMRegMask() const

bool checkFlag(Register Reg, uint8_t Flag) const

void addToSpilledVGPRs(unsigned num)

const ReservedRegSet & getWWMReservedRegs() const

void addToSpilledSGPRs(unsigned num)

Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override

Definition SIRegisterInfo.cpp:905

int64_t getScratchInstrOffset(const MachineInstr *MI) const

Definition SIRegisterInfo.cpp:799

bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override

Definition SIRegisterInfo.cpp:1086

const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const

Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...

Definition SIRegisterInfo.cpp:3688

ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const

Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.

Definition SIRegisterInfo.cpp:4038

MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const

Returns a lowest register that is not used at any point in the function.

Definition SIRegisterInfo.cpp:3716

static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)

Definition SIRegisterInfo.cpp:555

MCPhysReg get32BitRegister(MCPhysReg Reg) const

Definition SIRegisterInfo.cpp:3998

const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override

Definition SIRegisterInfo.cpp:431

bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:787

bool shouldRealignStack(const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:754

bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const

Definition SIRegisterInfo.cpp:2128

bool isProperlyAlignedRC(const TargetRegisterClass &RC) const

Definition SIRegisterInfo.cpp:4015

const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const

Definition SIRegisterInfo.cpp:3644

Register getFrameRegister(const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:516

LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const

Definition SIRegisterInfo.cpp:3580

bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const

Definition SIRegisterInfo.cpp:2208

SIRegisterInfo(const GCNSubtarget &ST)

Definition SIRegisterInfo.cpp:328

const uint32_t * getAllVGPRRegMask() const

Definition SIRegisterInfo.cpp:539

MCRegister getReturnAddressReg(const MachineFunction &MF) const

Definition SIRegisterInfo.cpp:3893

const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override

Definition SIRegisterInfo.cpp:403

bool hasBasePointer(const MachineFunction &MF) const

Definition SIRegisterInfo.cpp:531

const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override

Returns a legal register class to copy a register in the specified class to or from.

Definition SIRegisterInfo.cpp:1123

ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const

Definition SIRegisterInfo.cpp:3741

ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const

Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.

Definition SIRegisterInfo.cpp:4043

const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:462

MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const

Return the end register initially reserved for the scratch buffer in case spilling is needed.

Definition SIRegisterInfo.cpp:573

bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const

Special case of eliminateFrameIndex.

Definition SIRegisterInfo.cpp:2282

bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const

Definition SIRegisterInfo.cpp:3769

void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const

Definition SIRegisterInfo.cpp:1512

bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override

Definition SIRegisterInfo.cpp:749

LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const

Definition SIRegisterInfo.cpp:3506

static bool isChainScratchRegister(Register VGPR)

Definition SIRegisterInfo.cpp:457

bool requiresRegisterScavenging(const MachineFunction &Fn) const override

Definition SIRegisterInfo.cpp:767

bool opCanUseInlineConstant(unsigned OpType) const

Definition SIRegisterInfo.cpp:3697

const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const

Definition SIRegisterInfo.cpp:3899

const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override

Definition SIRegisterInfo.cpp:3918

bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override

Definition SIRegisterInfo.cpp:3731

const uint32_t * getNoPreservedMask() const override

Definition SIRegisterInfo.cpp:453

StringRef getRegAsmName(MCRegister Reg) const override

Definition SIRegisterInfo.cpp:3319

const uint32_t * getAllAllocatableSRegMask() const

Definition SIRegisterInfo.cpp:551

MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const

Return the largest available SGPR aligned to Align for the register class RC.

Definition SIRegisterInfo.cpp:565

const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const

Definition SIRegisterInfo.cpp:3757

unsigned getHWRegIndex(MCRegister Reg) const

Definition SIRegisterInfo.cpp:3323

const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const

Definition SIRegisterInfo.cpp:427

const uint32_t * getAllVectorRegMask() const

Definition SIRegisterInfo.cpp:547

const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const

Definition SIRegisterInfo.cpp:3662

static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)

Definition SIRegisterInfo.cpp:3600

const TargetRegisterClass * getPointerRegClass(unsigned Kind=0) const override

Definition SIRegisterInfo.cpp:1115

const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const

bool opCanUseLiteralConstant(unsigned OpType) const

Definition SIRegisterInfo.cpp:3706

Register getBaseRegister() const

Definition SIRegisterInfo.cpp:537

bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override

Definition SIRegisterInfo.cpp:3826

LLVM_READONLY const TargetRegisterClass * getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const

Definition SIRegisterInfo.cpp:3408

LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const

Definition SIRegisterInfo.cpp:3396

const TargetRegisterClass * getEquivalentAVClass(const TargetRegisterClass *SRC) const

Definition SIRegisterInfo.cpp:3670

bool requiresFrameIndexScavenging(const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:778

static bool isVGPRClass(const TargetRegisterClass *RC)

MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const

Definition SIRegisterInfo.cpp:3945

bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const

Definition SIRegisterInfo.cpp:3633

const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const

Definition SIRegisterInfo.cpp:3678

SmallVector< StringLiteral > getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:4078

LLVM_READONLY const TargetRegisterClass * getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const

Definition SIRegisterInfo.cpp:3589

unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:3784

ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const

Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.

Definition SIRegisterInfo.cpp:4033

unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override

Definition SIRegisterInfo.cpp:3802

BitVector getReservedRegs(const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:578

bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override

Definition SIRegisterInfo.cpp:857

const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const

Definition SIRegisterInfo.cpp:3763

void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const

Definition SIRegisterInfo.cpp:1952

unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC, bool IncludeCalls=true) const

Definition SIRegisterInfo.cpp:4063

const uint32_t * getAllAGPRRegMask() const

Definition SIRegisterInfo.cpp:543

const int * getRegUnitPressureSets(MCRegUnit RegUnit) const override

Definition SIRegisterInfo.cpp:3817

bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const

Definition SIRegisterInfo.cpp:3776

bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override

Definition SIRegisterInfo.cpp:2321

bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const

If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.

Definition SIRegisterInfo.cpp:1997

MCRegister getExec() const

Definition SIRegisterInfo.cpp:3934

MCRegister getVCC() const

Definition SIRegisterInfo.cpp:3930

int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override

Definition SIRegisterInfo.cpp:807

bool isVectorSuperClass(const TargetRegisterClass *RC) const

const TargetRegisterClass * getWaveMaskRegClass() const

unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const

Definition SIRegisterInfo.cpp:4048

void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override

Definition SIRegisterInfo.cpp:958

bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override

Definition SIRegisterInfo.cpp:793

const TargetRegisterClass * getVGPR64Class() const

Definition SIRegisterInfo.cpp:3938

void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const

Definition SIRegisterInfo.cpp:1964

static bool isSGPRClass(const TargetRegisterClass *RC)

static bool isAGPRClass(const TargetRegisterClass *RC)

SlotIndex - An opaque wrapper around machine indexes.

bool isValid() const

Returns true if this is a valid index.

SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)

Insert the given machine instruction into the mapping.

SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)

ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

bool hasFP(const MachineFunction &MF) const

hasFP - Return true if the specified function should have a dedicated frame pointer register.

const uint8_t TSFlags

Configurable target specific flags.

ArrayRef< MCPhysReg > getRegisters() const

unsigned getID() const

Return the register class ID number.

bool hasSubClassEq(const TargetRegisterClass *RC) const

Returns true if RC is a sub-class of or equal to this class.

bool hasSuperClassEq(const TargetRegisterClass *RC) const

Returns true if RC is a super-class of or equal to this class.

virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const

Returns the largest super class of RC that is legal to use in the current sub-target and has the same...

virtual bool shouldRealignStack(const MachineFunction &MF) const

True if storage within the function requires the stack pointer to be aligned more than the normal cal...

virtual bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM=nullptr, const LiveRegMatrix *Matrix=nullptr) const

Get a list of 'hint' registers that the register allocator should try first when allocating a physica...

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

A Use represents the edge between a Value definition and its users.

VNInfo - Value Number Information.

MCRegister getPhys(Register virtReg) const

returns the physical register mapped to the specified virtual register

bool hasPhys(Register virtReg) const

returns true if the specified virtual register is mapped to a physical register

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ PRIVATE_ADDRESS

Address space for private memory.

bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)

LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)

LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)

LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)

Is this literal inlinable, and not one of the values intended for floating point values.

@ OPERAND_REG_INLINE_AC_FIRST

@ OPERAND_REG_INLINE_AC_LAST

unsigned getRegBitWidth(const TargetRegisterClass &RC)

Get the size in bits of a register from the register class RC.

Definition SIRegisterInfo.cpp:3327

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_CS_ChainPreserve

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

@ Cold

Attempts to make code in the caller as efficient as possible under the assumption that the call is no...

@ Fast

Attempts to make calls as fast as possible (e.g.

@ C

The default llvm calling convention, compatible with C.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

@ Renamable

Register that may be renamed.

@ Define

Register definition.

@ Kill

The last use of a register.

@ Undef

Value of the register doesn't matter.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank

Convenient type to represent either a register class or a register bank.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

constexpr int popcount(T Value) noexcept

Count the number of set bits in a value.

auto reverse(ContainerTy &&C)

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

constexpr bool isUInt(uint64_t x)

Checks if an unsigned integer fits into the given bit width.

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

FunctionAddr VTableAddr uintptr_t uintptr_t Data

unsigned getDefRegState(bool B)

@ Sub

Subtraction of integers.

unsigned getKillRegState(bool B)

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

DWARFExpression::Operation Op

ArrayRef(const T &OneElt) -> ArrayRef< T >

void call_once(once_flag &flag, Function &&F, Args &&... ArgList)

Execute the function specified as a parameter once.

constexpr unsigned BitWidth

static const MachineMemOperand::Flags MOLastUse

Mark the MMO of a load as the last use.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

This class contains a discriminated union of information about pointers in memory operands,...

MachinePointerInfo getWithOffset(int64_t O) const

static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.

Definition SIRegisterInfo.cpp:79

int64_t VGPRLanes

Definition SIRegisterInfo.cpp:82

unsigned NumVGPRs

Definition SIRegisterInfo.cpp:81

unsigned PerVGPR

Definition SIRegisterInfo.cpp:80

Definition SIRegisterInfo.cpp:78

void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)

Definition SIRegisterInfo.cpp:319

ArrayRef< int16_t > SplitParts

Definition SIRegisterInfo.cpp:88

bool TmpVGPRLive

Definition SIRegisterInfo.cpp:100

SIMachineFunctionInfo & MFI

Definition SIRegisterInfo.cpp:110

Register TmpVGPR

Definition SIRegisterInfo.cpp:96

SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)

Definition SIRegisterInfo.cpp:118

bool IsWave32

Definition SIRegisterInfo.cpp:113

SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)

Definition SIRegisterInfo.cpp:124

unsigned MovOpc

Definition SIRegisterInfo.cpp:115

bool IsKill

Definition SIRegisterInfo.cpp:90

RegScavenger * RS

Definition SIRegisterInfo.cpp:107

const DebugLoc & DL

Definition SIRegisterInfo.cpp:91

unsigned NotOpc

Definition SIRegisterInfo.cpp:116

int Index

Definition SIRegisterInfo.cpp:104

void restore()

Definition SIRegisterInfo.cpp:254

PerVGPRData getPerVGPRData()

Definition SIRegisterInfo.cpp:150

int TmpVGPRIndex

Definition SIRegisterInfo.cpp:98

MachineBasicBlock::iterator MI

Definition SIRegisterInfo.cpp:87

Register ExecReg

Definition SIRegisterInfo.cpp:114

void readWriteTmpVGPR(unsigned Offset, bool IsLoad)

Definition SIRegisterInfo.cpp:295

const SIRegisterInfo & TRI

Definition SIRegisterInfo.cpp:112

unsigned NumSubRegs

Definition SIRegisterInfo.cpp:89

void prepare()

Definition SIRegisterInfo.cpp:171

MachineFunction & MF

Definition SIRegisterInfo.cpp:109

MachineBasicBlock * MBB

Definition SIRegisterInfo.cpp:108

Register SuperReg

Definition SIRegisterInfo.cpp:86

Register SavedExecReg

Definition SIRegisterInfo.cpp:102

const SIInstrInfo & TII

Definition SIRegisterInfo.cpp:111

unsigned EltSize

Definition SIRegisterInfo.cpp:105

The llvm::once_flag structure.