LLVM: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

72

85#include "llvm/IR/IntrinsicsAMDGPU.h"

86

87#define GET_TARGET_REGBANK_IMPL

88#include "AMDGPUGenRegisterBank.inc"

89

90

91#include "AMDGPUGenRegisterBankInfo.def"

92

93using namespace llvm;

95

96namespace {

97

98

100private:

106

107public:

110 : B(B), RBI(RBI_), MRI(MRI_), NewBank(RB) {

111 assert(B.isObservingChanges());

112 B.setChangeObserver(*this);

113 }

114

115 ~ApplyRegBankMapping() override {

117 applyBank(*MI);

118

119 B.stopObservingChanges();

120 }

121

122

124 const unsigned Opc = MI.getOpcode();

125 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||

126 Opc == AMDGPU::G_SEXT) {

127

128

129

130 Register DstReg = MI.getOperand(0).getReg();

131 Register SrcReg = MI.getOperand(1).getReg();

133 if (SrcBank == &AMDGPU::VCCRegBank) {

137 assert(NewBank == &AMDGPU::VGPRRegBank);

138

139

140

141 B.setInsertPt(*MI.getParent(), MI);

142

143 auto True = B.buildConstant(S32, Opc == AMDGPU::G_SEXT ? -1 : 1);

144 auto False = B.buildConstant(S32, 0);

145 B.buildSelect(DstReg, SrcReg, True, False);

146 MRI.setRegBank(True.getReg(0), *NewBank);

147 MRI.setRegBank(False.getReg(0), *NewBank);

148 MI.eraseFromParent();

149 }

150

151 assert(MRI.getRegClassOrRegBank(DstReg));

152 MRI.setRegBank(DstReg, *NewBank);

153 return;

154 }

155

156#ifndef NDEBUG

157 if (Opc == AMDGPU::G_TRUNC) {

158 Register DstReg = MI.getOperand(0).getReg();

160 assert(DstBank != &AMDGPU::VCCRegBank);

161 }

162#endif

163

165 if (Op.isReg())

166 continue;

167

168

170 if (Reg.isPhysical() || MRI.getRegClassOrRegBank(Reg))

171 continue;

172

175 assert(NewBank == &AMDGPU::VGPRRegBank &&

176 "s1 operands should only be used for vector bools");

177 assert((MI.getOpcode() != AMDGPU::G_TRUNC &&

178 MI.getOpcode() != AMDGPU::G_ANYEXT) &&

179 "not expecting legalization artifacts here");

180 RB = &AMDGPU::VCCRegBank;

181 }

182

183 MRI.setRegBank(Reg, *RB);

184 }

185 }

186

188

190

192 }

193

196

197

198

199

200 }

201};

202

203}

204

208

209

211

212 static auto InitializeRegisterBankOnce = [this]() {

213 assert(&getRegBank(AMDGPU::SGPRRegBankID) == &AMDGPU::SGPRRegBank &&

214 &getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&

215 &getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);

216 (void)this;

217 };

218

219 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);

220}

221

223 unsigned BankID = Bank.getID();

224 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;

225}

226

228 return RB != &AMDGPU::SGPRRegBank;

229}

230

234

235 if (Dst.getID() == AMDGPU::SGPRRegBankID &&

237 return std::numeric_limits::max();

238 }

239

240

241

242

243

244

245

246

247 if (Size == 1 &&

248 (Dst.getID() == AMDGPU::SGPRRegBankID) &&

250 Src.getID() == AMDGPU::SGPRRegBankID ||

251 Src.getID() == AMDGPU::VCCRegBankID))

252 return std::numeric_limits::max();

253

254

255 if (Dst.getID() == AMDGPU::AGPRRegBankID &&

256 Src.getID() == AMDGPU::AGPRRegBankID)

257 return 4;

258

260}

261

265

266

267

269 return 10;

270

277

278

279

280

281

282

283

284 return 1;

285}

286

289 LLT Ty) const {

290

291

292 if (TRI->isSGPRClass(&RC)) {

293

294

295

296 if (!Ty.isValid())

297 return AMDGPU::SGPRRegBank;

298

299 return Ty == LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;

300 }

301

302 return TRI->isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;

303}

304

305template

309 const std::array<unsigned, NumOps> RegSrcOpIdx,

311

313

315

317 for (unsigned I = 0; I < NumOps; ++I) {

318 Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();

320 }

321

322 for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {

324 Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);

325 }

326

327

328 unsigned MappingID = 2;

329 for (const auto &Entry : Table) {

330 for (unsigned I = 0; I < NumOps; ++I) {

331 int OpIdx = RegSrcOpIdx[I];

332 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);

333 }

334

337 Operands.size()));

338 }

339

340 return AltMappings;

341}

342

347 case Intrinsic::amdgcn_readlane: {

349

350 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },

351

352

353 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }

354 };

355

356 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };

358 }

359 case Intrinsic::amdgcn_writelane: {

361

362 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },

363

364

365 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },

366

367

368 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },

369

370

371 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }

372 };

373

374

375 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };

377 }

378 default:

380 }

381}

382

386

388 case Intrinsic::amdgcn_s_buffer_load: {

390

391 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },

392

393

394 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },

395

396

397 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },

398

399

400 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }

401 };

402

403

404 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };

406 }

407 case Intrinsic::amdgcn_ds_ordered_add:

408 case Intrinsic::amdgcn_ds_ordered_swap: {

409

411

412 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },

413

414

415 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }

416 };

417

418 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };

420 }

421 case Intrinsic::amdgcn_s_sendmsg:

422 case Intrinsic::amdgcn_s_sendmsghalt: {

423

425

426 { { AMDGPU::SGPRRegBankID }, 1 },

427

428

429 { { AMDGPU::VGPRRegBankID }, 3 }

430 };

431

432 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };

434 }

435 default:

437 }

438}

439

440

441

443 if (MI.hasOneMemOperand())

444 return false;

445

451

452

454 (Subtarget.hasScalarSubwordLoads() &&

455 ((MemSize == 16 && MMO->getAlign() >= Align(2)) ||

456 (MemSize == 8 && MMO->getAlign() >= Align(1))))) &&

457

459

460

462

465}

466

470

473

474

476 switch (MI.getOpcode()) {

477 case TargetOpcode::G_CONSTANT:

478 case TargetOpcode::G_IMPLICIT_DEF: {

480 if (Size == 1) {

482 { { AMDGPU::VGPRRegBankID }, 1 },

483 { { AMDGPU::SGPRRegBankID }, 1 },

484 { { AMDGPU::VCCRegBankID }, 1 }

485 };

486

488 }

489

490 [[fallthrough]];

491 }

492 case TargetOpcode::G_FCONSTANT:

493 case TargetOpcode::G_FRAME_INDEX:

494 case TargetOpcode::G_GLOBAL_VALUE: {

496 { { AMDGPU::VGPRRegBankID }, 1 },

497 { { AMDGPU::SGPRRegBankID }, 1 }

498 };

499

501 }

502 case TargetOpcode::G_AND:

503 case TargetOpcode::G_OR:

504 case TargetOpcode::G_XOR: {

506

507 if (Size == 1) {

508

511 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),

512 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),

513 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),

514 3);

515 AltMappings.push_back(&SCCMapping);

516

519 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),

520 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),

521 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),

522 3);

523 AltMappings.push_back(&VCCMapping0);

524 return AltMappings;

525 }

526

527 if (Size != 64)

528 break;

529

532 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),

533 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),

534 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),

535 3);

536 AltMappings.push_back(&SSMapping);

537

540 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),

541 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),

542 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),

543 3);

544 AltMappings.push_back(&VVMapping);

545 break;

546 }

547 case TargetOpcode::G_LOAD:

548 case TargetOpcode::G_ZEXTLOAD:

549 case TargetOpcode::G_SEXTLOAD: {

551 LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());

554

560 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),

561 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),

562 2);

563 AltMappings.push_back(&SSMapping);

564 }

565

567 2, 1,

569 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),

570 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),

571 2);

572 AltMappings.push_back(&VVMapping);

573

574

575

576

577

578

579

580 return AltMappings;

581

582 }

583 case TargetOpcode::G_SELECT: {

587 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),

588 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),

589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),

590 4);

591 AltMappings.push_back(&SSMapping);

592

595 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),

596 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),

597 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),

598 4);

599 AltMappings.push_back(&VVMapping);

600

601 return AltMappings;

602 }

603 case TargetOpcode::G_UADDE:

604 case TargetOpcode::G_USUBE:

605 case TargetOpcode::G_SADDE:

606 case TargetOpcode::G_SSUBE: {

610 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),

611 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),

612 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),

613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),

614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),

615 5);

616 AltMappings.push_back(&SSMapping);

617

620 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),

621 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),

622 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),

623 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),

624 5);

625 AltMappings.push_back(&VVMapping);

626 return AltMappings;

627 }

628 case AMDGPU::G_BRCOND: {

629 assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);

630

631

634 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1), nullptr}),

635 2);

636 AltMappings.push_back(&SMapping);

637

640 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),

641 2);

642 AltMappings.push_back(&VMapping);

643 return AltMappings;

644 }

645 case AMDGPU::G_INTRINSIC:

646 case AMDGPU::G_INTRINSIC_CONVERGENT:

648 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:

649 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:

651 default:

652 break;

653 }

655}

656

660 LLT HalfTy,

664 Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);

665 Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);

667 MRI->setRegBank(LoLHS, *Bank);

668 MRI->setRegBank(HiLHS, *Bank);

669

672

673 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)

674 .addDef(LoLHS)

675 .addDef(HiLHS)

676 .addUse(Reg);

677}

678

679

681 LLT NewTy) {

684 MRI.setType(Reg, NewTy);

685 }

686}

687

689 if (Ty.isVector()) {

690 assert(Ty.getElementCount().isKnownMultipleOf(2));

692 Ty.getElementType());

693 }

694

695 assert(Ty.getScalarSizeInBits() % 2 == 0);

696 return LLT::scalar(Ty.getScalarSizeInBits() / 2);

697}

698

699

700

704 LLT Ty = MRI.getType(Src);

706

707 if (Bank == &AMDGPU::SGPRRegBank)

708 return Src;

709

710 unsigned Bits = Ty.getSizeInBits();

711 assert(Bits % 32 == 0);

712

713 if (Bank != &AMDGPU::VGPRRegBank) {

714

715 Src = B.buildCopy(Ty, Src).getReg(0);

716 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);

717 }

718

720 unsigned NumParts = Bits / 32;

723

724 if (Bits == 32) {

726 } else {

727 auto Unmerge = B.buildUnmerge(S32, Src);

728 for (unsigned i = 0; i < NumParts; ++i)

729 SrcParts.push_back(Unmerge.getReg(i));

730 }

731

732 for (unsigned i = 0; i < NumParts; ++i) {

733 Register SrcPart = SrcParts[i];

734 Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);

735 MRI.setType(DstPart, NumParts == 1 ? Ty : S32);

736

739 (void)Constrained;

740 assert(Constrained && "Failed to constrain readfirstlane src reg");

741

742 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});

743

745 }

746

747 if (Bits == 32)

748 return DstParts[0];

749

750 Register Dst = B.buildMergeLikeInstr(Ty, DstParts).getReg(0);

751 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);

752 return Dst;

753}

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

776

777

779

782

786

787#ifndef NDEBUG

788 const int OrigRangeSize = std::distance(Range.begin(), Range.end());

789#endif

790

792 Register SaveExecReg = MRI.createVirtualRegister(WaveRC);

793 Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);

794

795

796 B.buildInstr(TargetOpcode::IMPLICIT_DEF)

797 .addDef(InitSaveExecReg);

798

799 Register PhiExec = MRI.createVirtualRegister(WaveRC);

800 Register NewExec = MRI.createVirtualRegister(WaveRC);

801

802

803

814

818

819

822

823 MBB.addSuccessor(LoopBB);

825

826 B.setInsertPt(*LoopBB, LoopBB->end());

827

828 B.buildInstr(TargetOpcode::PHI)

829 .addDef(PhiExec)

830 .addReg(InitSaveExecReg)

831 .addMBB(&MBB)

832 .addReg(NewExec)

833 .addMBB(BodyBB);

834

836

838

839

840

842

843

845 auto NewEnd = BodyBB->end();

846

847 B.setMBB(*LoopBB);

848

851

852 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);

853

857 if (!SGPROperandRegs.count(OldReg))

858 continue;

859

860

861

862 auto OldVal = WaterfalledRegMap.find(OldReg);

863 if (OldVal != WaterfalledRegMap.end()) {

864 Op.setReg(OldVal->second);

865 continue;

866 }

867

869 LLT OpTy = MRI.getType(OpReg);

870

872 if (OpBank != &AMDGPU::VGPRRegBank) {

873

874 B.setMBB(MBB);

875 OpReg = B.buildCopy(OpTy, OpReg).getReg(0);

876 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);

877 B.setMBB(*LoopBB);

878 }

879

881

882

884 bool Is64 = OpSize % 64 == 0;

885 unsigned PartSize = Is64 ? 64 : 32;

887 unsigned NumParts = OpSize / PartSize;

890

891 if (NumParts == 1) {

893 CurrentLaneParts.push_back(CurrentLaneReg);

894 } else {

895 auto UnmergeOp = B.buildUnmerge(PartTy, OpReg);

896 auto UnmergeCurrentLane = B.buildUnmerge(PartTy, CurrentLaneReg);

897 for (unsigned i = 0; i < NumParts; ++i) {

898 OpParts.push_back(UnmergeOp.getReg(i));

899 CurrentLaneParts.push_back(UnmergeCurrentLane.getReg(i));

900 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);

901 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);

902 }

903 }

904

905 for (unsigned i = 0; i < NumParts; ++i) {

907 OpParts[i]).getReg(0);

908 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);

909

910 if (!CondReg) {

911 CondReg = CmpReg;

912 } else {

913 CondReg = B.buildAnd(S1, CondReg, CmpReg).getReg(0);

914 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);

915 }

916 }

917

918 Op.setReg(CurrentLaneReg);

919

920

921 WaterfalledRegMap.insert(std::pair(OldReg, Op.getReg()));

922 }

923 }

924

925

926 CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot,

928 .addReg(CondReg)

929 .getReg(0);

930 MRI.setRegClass(CondReg, WaveRC);

931

932

934 .addDef(NewExec)

936

937 MRI.setSimpleHint(NewExec, CondReg);

938

939 B.setInsertPt(*BodyBB, BodyBB->end());

940

941

945 .addReg(NewExec);

946

947

948

949

950

951 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);

952

953

956

957

958 B.setMBB(*RestoreExecBB);

959 B.buildInstr(LMC.MovTermOpc).addDef(LMC.ExecReg).addReg(SaveExecReg);

960

961

962

963 B.setInsertPt(*RemainderBB, RemainderBB->begin());

964

965 return true;

966}

967

968

969

970

971

975 for (unsigned Op : OpIndices) {

979 if (OpBank->getID() != AMDGPU::SGPRRegBankID)

980 SGPROperandRegs.insert(Reg);

981 }

982

983

984 return !SGPROperandRegs.empty();

985}

986

989

990

992

994 return false;

995

998 SGPROperandRegs);

999}

1000

1001

1007 if (Bank == &AMDGPU::SGPRRegBank)

1008 return;

1009

1011 MI.getOperand(OpIdx).setReg(Reg);

1012}

1013

1014

1015

1017 unsigned TotalSize = Ty.getSizeInBits();

1018 if (!Ty.isVector())

1020

1023 assert(FirstSize % EltSize == 0);

1024

1025 unsigned FirstPartNumElts = FirstSize / EltSize;

1026 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;

1027

1030}

1031

1033 if (!Ty.isVector())

1035

1039}

1040

1046 Register DstReg = MI.getOperand(0).getReg();

1047 const LLT LoadTy = MRI.getType(DstReg);

1050 const unsigned MaxNonSmrdLoadSize = 128;

1051

1054 if (DstBank == &AMDGPU::SGPRRegBank) {

1055

1056

1057 if (LoadSize != 32 && (LoadSize != 96 || Subtarget.hasScalarDwordx3Loads()))

1058 return false;

1059

1061

1062

1063

1064

1065 if (LoadSize == 32 &&

1067 return false;

1068

1069 if (LoadSize == 32 &&

1070 ((MemSize == 8 && MMO->getAlign() >= Align(1)) ||

1071 (MemSize == 16 && MMO->getAlign() >= Align(2))) &&

1074 return false;

1075

1076 Register PtrReg = MI.getOperand(1).getReg();

1077

1078 ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);

1079

1080 if (LoadSize == 32) {

1081

1082

1084 if (MI.getOpcode() == AMDGPU::G_SEXTLOAD) {

1085

1086 auto WideLoad = B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);

1087 B.buildSExtInReg(MI.getOperand(0), WideLoad, MemSize);

1088 } else if (MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {

1089

1090 auto WideLoad = B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);

1091 B.buildZExtInReg(MI.getOperand(0), WideLoad, MemSize);

1092 } else

1093

1094 B.buildLoadFromOffset(MI.getOperand(0), PtrReg, *MMO, 0);

1095 } else {

1096

1097

1100 LLT Part64, Part32;

1104 return false;

1105 return true;

1106 }

1108 auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);

1110 B.buildTrunc(MI.getOperand(0), WideLoad);

1111 } else {

1112 B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(),

1113 WideLoad);

1114 }

1115 }

1116

1117 MI.eraseFromParent();

1118 return true;

1119 }

1120

1121

1122 if (LoadSize <= MaxNonSmrdLoadSize)

1123 return false;

1124

1126

1127 if (SrcRegs.empty())

1128 SrcRegs.push_back(MI.getOperand(1).getReg());

1129

1130

1131

1132 Register BasePtrReg = SrcRegs[0];

1133 LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());

1134 MRI.setType(BasePtrReg, PtrTy);

1135

1136

1137

1140 assert(LoadSize % MaxNonSmrdLoadSize == 0);

1141 unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;

1142 const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);

1143 ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);

1148 return false;

1149 } else {

1151 return false;

1152 }

1153 }

1154

1155 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);

1156 return true;

1157}

1158

1166 const auto &TFI = *ST.getFrameLowering();

1167

1168

1169

1171 "Stack grows upwards for AMDGPU");

1172

1173 Register Dst = MI.getOperand(0).getReg();

1174 Register AllocSize = MI.getOperand(1).getReg();

1176

1178

1179 if (SizeBank != &AMDGPU::SGPRRegBank) {

1180 auto WaveReduction =

1181 B.buildIntrinsic(Intrinsic::amdgcn_wave_reduce_umax, {LLT::scalar(32)})

1182 .addUse(AllocSize)

1183 .addImm(0);

1184 AllocSize = WaveReduction.getReg(0);

1185 }

1186

1187 LLT PtrTy = MRI.getType(Dst);

1189

1192 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);

1193

1194 auto WaveSize = B.buildConstant(LLT::scalar(32), ST.getWavefrontSizeLog2());

1195 auto ScaledSize = B.buildShl(IntPtrTy, AllocSize, WaveSize);

1196

1197 auto OldSP = B.buildCopy(PtrTy, SPReg);

1198 if (Alignment > TFI.getStackAlign()) {

1199 auto StackAlignMask = (Alignment.value() << ST.getWavefrontSizeLog2()) - 1;

1200 auto Tmp1 = B.buildPtrAdd(PtrTy, OldSP,

1201 B.buildConstant(LLT::scalar(32), StackAlignMask));

1202 B.buildMaskLowPtrBits(Dst, Tmp1,

1203 Log2(Alignment) + ST.getWavefrontSizeLog2());

1204 } else {

1205 B.buildCopy(Dst, OldSP);

1206 }

1207 auto PtrAdd = B.buildPtrAdd(PtrTy, Dst, ScaledSize);

1208 B.buildCopy(SPReg, PtrAdd);

1209 MI.eraseFromParent();

1210 return true;

1211}

1212

1216 int RsrcIdx) const {

1217 const int NumDefs = MI.getNumExplicitDefs();

1218

1219

1220

1221 RsrcIdx += NumDefs + 1;

1222

1223

1225

1226

1228 for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {

1229 if (MI.getOperand(I).isReg())

1230 continue;

1231

1232

1233 if (I == RsrcIdx || I == RsrcIdx + 1)

1235 }

1236

1238 return true;

1239}

1240

1241

1242

1245 Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const {

1248

1249 if (std::optional<int64_t> Imm =

1251 uint32_t SOffset, ImmOffset;

1252 if (TII->splitMUBUFOffset(*Imm, SOffset, ImmOffset, Alignment)) {

1253 VOffsetReg = B.buildConstant(S32, 0).getReg(0);

1254 SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);

1255 InstOffsetVal = ImmOffset;

1256

1257 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);

1258 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);

1259 return SOffset + ImmOffset;

1260 }

1261 }

1262

1265

1268

1269 uint32_t SOffset, ImmOffset;

1270 if ((int)Offset > 0 &&

1271 TII->splitMUBUFOffset(Offset, SOffset, ImmOffset, Alignment)) {

1273 VOffsetReg = Base;

1274 SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);

1275 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);

1276 InstOffsetVal = ImmOffset;

1277 return 0;

1278 }

1279

1280

1281 if (SOffset == 0) {

1282 VOffsetReg = B.buildConstant(S32, 0).getReg(0);

1283 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);

1284 SOffsetReg = Base;

1285 InstOffsetVal = ImmOffset;

1286 return 0;

1287 }

1288 }

1289

1290

1295

1298

1299 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {

1300 VOffsetReg = Src0;

1301 SOffsetReg = Src1;

1302 return 0;

1303 }

1304

1305 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {

1306 VOffsetReg = Src1;

1307 SOffsetReg = Src0;

1308 return 0;

1309 }

1310 }

1311

1312

1313

1314 if (getRegBank(CombinedOffset, *MRI, *TRI) == &AMDGPU::VGPRRegBank) {

1315 VOffsetReg = CombinedOffset;

1316 } else {

1317 VOffsetReg = B.buildCopy(S32, CombinedOffset).getReg(0);

1318 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);

1319 }

1320

1321 SOffsetReg = B.buildConstant(S32, 0).getReg(0);

1322 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);

1323 return 0;

1324}

1325

1327 switch (Opc) {

1328 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:

1329 return AMDGPU::G_AMDGPU_BUFFER_LOAD;

1330 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:

1331 return AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;

1332 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:

1333 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;

1334 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:

1335 return AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;

1336 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:

1337 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;

1338 default:

1339 break;

1340 }

1342}

1343

1348

1350 Register Dst = MI.getOperand(0).getReg();

1351 LLT Ty = MRI.getType(Dst);

1352

1357 if (RSrcBank == &AMDGPU::SGPRRegBank &&

1358 OffsetBank == &AMDGPU::SGPRRegBank)

1359 return true;

1360

1361

1362

1363

1364 unsigned LoadSize = Ty.getSizeInBits();

1365 int NumLoads = 1;

1366 if (LoadSize == 256 || LoadSize == 512) {

1367 NumLoads = LoadSize / 128;

1368 Ty = Ty.divide(NumLoads);

1369 }

1370

1371

1372

1373 const Align Alignment = NumLoads > 1 ? Align(16 * NumLoads) : Align(1);

1374

1376

1379 int64_t ImmOffset = 0;

1380

1381 unsigned MMOOffset = setBufferOffsets(B, MI.getOperand(2).getReg(), VOffset,

1382 SOffset, ImmOffset, Alignment);

1383

1384

1385

1386 const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;

1387 const Align MemAlign(4);

1392 MemSize, MemAlign);

1393 if (MMOOffset != 0)

1395

1396

1397

1398

1399 Register RSrc = MI.getOperand(1).getReg();

1400 Register VIndex = B.buildConstant(S32, 0).getReg(0);

1401 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);

1402

1404

1407

1408 for (int i = 0; i < NumLoads; ++i) {

1409 if (NumLoads == 1) {

1410 LoadParts[i] = Dst;

1411 } else {

1412 LoadParts[i] = MRI.createGenericVirtualRegister(Ty);

1413 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);

1414 }

1415

1417 if (i != 0)

1419

1421 .addDef(LoadParts[i])

1422 .addUse(RSrc)

1423 .addUse(VIndex)

1424 .addUse(VOffset)

1425 .addUse(SOffset)

1426 .addImm(ImmOffset + 16 * i)

1427 .addImm(0)

1428 .addImm(0)

1429 .addMemOperand(MMO);

1430 }

1431

1432

1433

1434

1435 if (RSrcBank != &AMDGPU::SGPRRegBank) {

1436

1437

1438 B.setInstr(*Span.begin());

1439 MI.eraseFromParent();

1440

1442

1443 OpsToWaterfall.insert(RSrc);

1445 OpsToWaterfall);

1446 }

1447

1448 if (NumLoads != 1) {

1449 if (Ty.isVector())

1450 B.buildConcatVectors(Dst, LoadParts);

1451 else

1452 B.buildMergeLikeInstr(Dst, LoadParts);

1453 }

1454

1455

1456 if (RSrcBank == &AMDGPU::SGPRRegBank)

1457 MI.eraseFromParent();

1458

1459 return true;

1460}

1461

1464 bool Signed) const {

1467

1468

1470

1471 Register DstReg = MI.getOperand(0).getReg();

1472 LLT Ty = MRI.getType(DstReg);

1473

1475

1477 Register SrcReg = MI.getOperand(FirstOpnd).getReg();

1478 Register OffsetReg = MI.getOperand(FirstOpnd + 1).getReg();

1479 Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg();

1480

1483 if (DstBank == &AMDGPU::VGPRRegBank) {

1484 if (Ty == S32)

1485 return true;

1486

1487

1488

1489 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);

1490

1492

1493 auto ShiftOffset = Signed ? B.buildAShr(S64, SrcReg, OffsetReg)

1494 : B.buildLShr(S64, SrcReg, OffsetReg);

1495 auto UnmergeSOffset = B.buildUnmerge({S32, S32}, ShiftOffset);

1496

1497

1498

1500

1501

1502 auto Zero = B.buildConstant(S32, 0);

1503 auto WidthImm = ConstWidth->Value.getZExtValue();

1504 if (WidthImm <= 32) {

1505

1506

1507 auto Extract =

1508 Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg)

1509 : B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg);

1510 auto Extend =

1511 Signed ? B.buildAShr(S32, Extract, B.buildConstant(S32, 31)) : Zero;

1512 B.buildMergeLikeInstr(DstReg, {Extract, Extend});

1513 } else {

1514

1515

1516 auto UpperWidth = B.buildConstant(S32, WidthImm - 32);

1517 auto Extract =

1519 ? B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)

1520 : B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);

1521 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});

1522 }

1523 MI.eraseFromParent();

1524 return true;

1525 }

1526

1527

1528

1529 auto ExtShift = B.buildSub(S32, B.buildConstant(S32, 64), WidthReg);

1530 auto SignBit = B.buildShl(S64, ShiftOffset, ExtShift);

1532 B.buildAShr(S64, SignBit, ExtShift);

1533 else

1534 B.buildLShr(S64, SignBit, ExtShift);

1535 MI.eraseFromParent();

1536 return true;

1537 }

1538

1539

1540

1541 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);

1542

1543

1545 auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);

1546

1547

1548 auto ShiftWidth = B.buildShl(S32, WidthReg, B.buildConstant(S32, 16));

1549

1550

1551

1552

1553 auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);

1554

1555

1556

1557 unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :

1558 (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);

1559

1560 auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});

1563

1564 MI.eraseFromParent();

1565 return true;

1566}

1567

1572

1573

1575

1576 Register Dst0 = MI.getOperand(0).getReg();

1577 Register Dst1 = MI.getOperand(1).getReg();

1578 Register Src0 = MI.getOperand(2).getReg();

1579 Register Src1 = MI.getOperand(3).getReg();

1580 Register Src2 = MI.getOperand(4).getReg();

1581

1582 if (MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)

1583 return true;

1584

1585 bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;

1588

1589 bool DstOnValu = MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;

1590 bool Accumulate = true;

1591

1592 if (!DstOnValu) {

1594 Accumulate = false;

1595 }

1596

1597

1599 Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0);

1600 bool MulHiInVgpr = false;

1601

1602 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);

1603

1605 DstHi = IsUnsigned ? B.buildUMulH(S32, Src0, Src1).getReg(0)

1606 : B.buildSMulH(S32, Src0, Src1).getReg(0);

1607 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);

1608 } else {

1609 Register VSrc0 = B.buildCopy(S32, Src0).getReg(0);

1610 Register VSrc1 = B.buildCopy(S32, Src1).getReg(0);

1611

1612 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);

1613 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);

1614

1615 DstHi = IsUnsigned ? B.buildUMulH(S32, VSrc0, VSrc1).getReg(0)

1616 : B.buildSMulH(S32, VSrc0, VSrc1).getReg(0);

1617 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);

1618

1619 if (!DstOnValu) {

1621 } else {

1622 MulHiInVgpr = true;

1623 }

1624 }

1625

1626

1627

1628

1629

1630

1631

1632

1633 LLT CarryType = DstOnValu ? S1 : S32;

1635 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;

1637 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;

1640

1641 if (!IsUnsigned) {

1642 Zero = B.buildConstant(S32, 0).getReg(0);

1643 MRI.setRegBank(Zero,

1644 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);

1645

1647 .getReg(0);

1648 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank

1649 : AMDGPU::SGPRRegBank);

1650

1651 if (DstOnValu && !MulHiInVgpr) {

1652 Carry = B.buildTrunc(S1, Carry).getReg(0);

1653 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);

1654 }

1655 }

1656

1657 if (Accumulate) {

1658 if (DstOnValu) {

1659 DstLo = B.buildCopy(S32, DstLo).getReg(0);

1660 DstHi = B.buildCopy(S32, DstHi).getReg(0);

1661 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);

1662 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);

1663 }

1664

1665 auto Unmerge = B.buildUnmerge(S32, Src2);

1666 Register Src2Lo = Unmerge.getReg(0);

1667 Register Src2Hi = Unmerge.getReg(1);

1668 MRI.setRegBank(Src2Lo, DstBank);

1669 MRI.setRegBank(Src2Hi, DstBank);

1670

1671 if (!IsUnsigned) {

1672 auto Src2Sign = B.buildICmp(CmpInst::ICMP_SLT, CarryType, Src2Hi, Zero);

1673 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);

1674

1675 Carry = B.buildXor(CarryType, Carry, Src2Sign).getReg(0);

1676 MRI.setRegBank(Carry, CarryBank);

1677 }

1678

1679 auto AddLo = B.buildUAddo(S32, CarryType, DstLo, Src2Lo);

1680 DstLo = AddLo.getReg(0);

1681 Register CarryLo = AddLo.getReg(1);

1682 MRI.setRegBank(DstLo, DstBank);

1683 MRI.setRegBank(CarryLo, CarryBank);

1684

1685 auto AddHi = B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo);

1686 DstHi = AddHi.getReg(0);

1687 MRI.setRegBank(DstHi, DstBank);

1688

1689 Register CarryHi = AddHi.getReg(1);

1690 MRI.setRegBank(CarryHi, CarryBank);

1691

1692 if (IsUnsigned) {

1693 Carry = CarryHi;

1694 } else {

1695 Carry = B.buildXor(CarryType, Carry, CarryHi).getReg(0);

1696 MRI.setRegBank(Carry, CarryBank);

1697 }

1698 } else {

1699 if (IsUnsigned) {

1700 Carry = B.buildConstant(CarryType, 0).getReg(0);

1701 MRI.setRegBank(Carry, CarryBank);

1702 }

1703 }

1704

1705 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});

1706

1707 if (DstOnValu) {

1708 B.buildCopy(Dst1, Carry);

1709 } else {

1710 B.buildTrunc(Dst1, Carry);

1711 }

1712

1713 MI.eraseFromParent();

1714 return true;

1715}

1716

1717

1719 switch (Opc) {

1720 case TargetOpcode::G_ASHR:

1721 case TargetOpcode::G_SMIN:

1722 case TargetOpcode::G_SMAX:

1723 return TargetOpcode::G_SEXT;

1724 case TargetOpcode::G_LSHR:

1725 case TargetOpcode::G_UMIN:

1726 case TargetOpcode::G_UMAX:

1727 return TargetOpcode::G_ZEXT;

1728 default:

1729 return TargetOpcode::G_ANYEXT;

1730 }

1731}

1732

1733

1734

1735static std::pair<Register, Register>

1738 auto Bitcast = B.buildBitcast(S32, Src);

1739

1740 if (ExtOpcode == TargetOpcode::G_SEXT) {

1741 auto ExtLo = B.buildSExtInReg(S32, Bitcast, 16);

1742 auto ShiftHi = B.buildAShr(S32, Bitcast, B.buildConstant(S32, 16));

1743 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));

1744 }

1745

1746 auto ShiftHi = B.buildLShr(S32, Bitcast, B.buildConstant(S32, 16));

1747 if (ExtOpcode == TargetOpcode::G_ZEXT) {

1748 auto ExtLo = B.buildAnd(S32, Bitcast, B.buildConstant(S32, 0xffff));

1749 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));

1750 }

1751

1752 assert(ExtOpcode == TargetOpcode::G_ANYEXT);

1753 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));

1754}

1755

1756

1757

1761 if (!SrcReg.empty()) {

1764 return true;

1765 }

1766

1767 return false;

1768}

1769

1770

1774 if (Subtarget.hasUnpackedD16VMem())

1775 return Reg;

1776

1778 LLT StoreVT = MRI.getType(Reg);

1780 return Reg;

1781

1782 auto Unmerge = B.buildUnmerge(S16, Reg);

1783

1784

1786 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)

1787 WideRegs.push_back(Unmerge.getReg(I));

1788

1791

1793 .getReg(0);

1794}

1795

1796static std::pair<Register, unsigned>

1798 int64_t Const;

1800 return std::pair(Register(), Const);

1801

1804 return std::pair(Base, Const);

1805

1806

1807 return std::pair(Reg, 0);

1808}

1809

1810std::pair<Register, unsigned>

1812 Register OrigOffset) const {

1815 unsigned ImmOffset;

1817

1818

1820 OrigOffset);

1821

1822 unsigned C1 = 0;

1823 if (ImmOffset != 0) {

1824

1825

1826

1827

1828

1829

1830

1831

1832 unsigned Overflow = ImmOffset & ~MaxImm;

1833 ImmOffset -= Overflow;

1834 if ((int32_t)Overflow < 0) {

1835 Overflow += ImmOffset;

1836 ImmOffset = 0;

1837 }

1838

1839 C1 = ImmOffset;

1840 if (Overflow != 0) {

1841 if (!BaseReg)

1842 BaseReg = B.buildConstant(S32, Overflow).getReg(0);

1843 else {

1844 auto OverflowVal = B.buildConstant(S32, Overflow);

1845 BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);

1846 }

1847 }

1848 }

1849

1850 if (!BaseReg)

1851 BaseReg = B.buildConstant(S32, 0).getReg(0);

1852

1853 return {BaseReg, C1};

1854}

1855

1859 LLT SrcTy = MRI.getType(SrcReg);

1860 if (SrcTy.getSizeInBits() == 32) {

1861

1862 B.buildInstr(AMDGPU::V_MOV_B32_e32)

1863 .addDef(DstReg)

1864 .addUse(SrcReg);

1867 }

1868

1869 Register TmpReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

1870 Register TmpReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

1871

1872 B.buildInstr(AMDGPU::V_MOV_B32_e32)

1873 .addDef(TmpReg0)

1874 .addUse(SrcReg, 0, AMDGPU::sub0);

1875 B.buildInstr(AMDGPU::V_MOV_B32_e32)

1876 .addDef(TmpReg1)

1877 .addUse(SrcReg, 0, AMDGPU::sub1);

1878 B.buildInstr(AMDGPU::REG_SEQUENCE)

1879 .addDef(DstReg)

1880 .addUse(TmpReg0)

1881 .addImm(AMDGPU::sub0)

1882 .addUse(TmpReg1)

1883 .addImm(AMDGPU::sub1);

1884

1887}

1888

1889

1890

1894 unsigned ConstOffset) {

1899

1900 auto MaterializedOffset = B.buildConstant(S32, ConstOffset);

1901

1902 auto Add = B.buildAdd(S32, WaterfallIdx, MaterializedOffset);

1903 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);

1904 MRI.setRegBank(Add.getReg(0), AMDGPU::SGPRRegBank);

1906}

1907

1908

1909

1910

1911

1914 unsigned ExtOpc,

1916 bool IsBooleanSrc = false) {

1917 if (ExtOpc == AMDGPU::G_ZEXT) {

1918 B.buildConstant(Hi32Reg, 0);

1919 } else if (ExtOpc == AMDGPU::G_SEXT) {

1920 if (IsBooleanSrc) {

1921

1922

1923 B.buildCopy(Hi32Reg, Lo32Reg);

1924 } else {

1925

1926 auto ShiftAmt = B.buildConstant(LLT::scalar(32), 31);

1927 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);

1928 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);

1929 }

1930 } else {

1931 assert(ExtOpc == AMDGPU::G_ANYEXT && "not an integer extension");

1932 B.buildUndef(Hi32Reg);

1933 }

1934}

1935

1936bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(

1938 const OperandsMapper &OpdMapper) const {

1939 MachineRegisterInfo &MRI = *B.getMRI();

1940

1941 Register VecReg = MI.getOperand(1).getReg();

1942 Register Idx = MI.getOperand(2).getReg();

1943

1944 const RegisterBank &IdxBank =

1945 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;

1946

1947 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;

1948

1949 LLT VecTy = MRI.getType(VecReg);

1952

1955 return false;

1956

1958

1959 const RegisterBank &DstBank =

1960 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;

1961 const RegisterBank &SrcBank =

1962 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;

1963

1964 const RegisterBank &CCBank =

1965 (DstBank == AMDGPU::SGPRRegBank &&

1966 SrcBank == AMDGPU::SGPRRegBank &&

1967 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank

1968 : AMDGPU::VCCRegBank;

1969 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1);

1970

1971 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {

1972 Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg();

1973 MRI.setRegBank(Idx, AMDGPU::VGPRRegBank);

1974 }

1975

1978 unsigned NumLanes = DstRegs.size();

1979 if (!NumLanes)

1980 NumLanes = 1;

1981 else

1982 EltTy = MRI.getType(DstRegs[0]);

1983

1984 auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg);

1986 for (unsigned L = 0; L < NumLanes; ++L)

1987 Res[L] = UnmergeToEltTy.getReg(L);

1988

1989 for (unsigned I = 1; I < NumElem; ++I) {

1990 auto IC = B.buildConstant(S32, I);

1991 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);

1993 MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank);

1994

1995 for (unsigned L = 0; L < NumLanes; ++L) {

1996 auto S = B.buildSelect(EltTy, Cmp,

1997 UnmergeToEltTy.getReg(I * NumLanes + L), Res[L]);

1998

1999 for (unsigned N : { 0, 2, 3 })

2000 MRI.setRegBank(S->getOperand(N).getReg(), DstBank);

2001

2002 Res[L] = S->getOperand(0).getReg();

2003 }

2004 }

2005

2006 for (unsigned L = 0; L < NumLanes; ++L) {

2007 Register DstReg = (NumLanes == 1) ? MI.getOperand(0).getReg() : DstRegs[L];

2008 B.buildCopy(DstReg, Res[L]);

2009 MRI.setRegBank(DstReg, DstBank);

2010 }

2011

2012 MRI.setRegBank(MI.getOperand(0).getReg(), DstBank);

2013 MI.eraseFromParent();

2014

2015 return true;

2016}

2017

2018

2019

2024 if (CurrBank && *CurrBank != Bank) {

2026 MRI.setRegBank(Copy, Bank);

2027 return Copy;

2028 }

2029

2030 MRI.setRegBank(Reg, Bank);

2031 return Reg;

2032}

2033

2034bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(

2036 const OperandsMapper &OpdMapper) const {

2037

2038 MachineRegisterInfo &MRI = *B.getMRI();

2039 Register VecReg = MI.getOperand(1).getReg();

2040 Register Idx = MI.getOperand(3).getReg();

2041

2042 const RegisterBank &IdxBank =

2043 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;

2044

2045 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;

2046

2047 LLT VecTy = MRI.getType(VecReg);

2050

2053 return false;

2054

2056

2057 const RegisterBank &DstBank =

2058 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;

2059 const RegisterBank &SrcBank =

2060 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;

2061 const RegisterBank &InsBank =

2062 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;

2063

2064 const RegisterBank &CCBank =

2065 (DstBank == AMDGPU::SGPRRegBank &&

2066 SrcBank == AMDGPU::SGPRRegBank &&

2067 InsBank == AMDGPU::SGPRRegBank &&

2068 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank

2069 : AMDGPU::VCCRegBank;

2070 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1);

2071

2072 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {

2073 Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg();

2074 MRI.setRegBank(Idx, AMDGPU::VGPRRegBank);

2075 }

2076

2079 unsigned NumLanes = InsRegs.size();

2080 if (!NumLanes) {

2081 NumLanes = 1;

2082 InsRegs.push_back(MI.getOperand(2).getReg());

2083 } else {

2084 EltTy = MRI.getType(InsRegs[0]);

2085 }

2086

2087 auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg);

2088 SmallVector<Register, 16> Ops(NumElem * NumLanes);

2089

2090 for (unsigned I = 0; I < NumElem; ++I) {

2091 auto IC = B.buildConstant(S32, I);

2092 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);

2094 MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank);

2095

2096 for (unsigned L = 0; L < NumLanes; ++L) {

2098 Register Op1 = UnmergeToEltTy.getReg(I * NumLanes + L);

2100

2101 Register Select = B.buildSelect(EltTy, Cmp, Op0, Op1).getReg(0);

2102 MRI.setRegBank(Select, DstBank);

2103

2105 }

2106 }

2107

2109 if (MergeTy == MRI.getType(MI.getOperand(0).getReg())) {

2110 B.buildBuildVector(MI.getOperand(0), Ops);

2111 } else {

2112 auto Vec = B.buildBuildVector(MergeTy, Ops);

2113 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);

2114 B.buildBitcast(MI.getOperand(0).getReg(), Vec);

2115 }

2116

2117 MRI.setRegBank(MI.getOperand(0).getReg(), DstBank);

2118 MI.eraseFromParent();

2119

2120 return true;

2121}

2122

2123

2129

2130

2131 if (DefRegs.empty()) {

2134 return;

2135 }

2136

2139 (Src0Regs.empty() || Src0Regs.size() == 2));

2140

2143 Register DstReg = MI.getOperand(0).getReg();

2145

2146

2147

2148

2149

2150 if (Src0Regs.empty())

2152 else

2154

2155 if (Src1Regs.empty())

2157 else

2159

2161

2162

2163

2164

2165

2166

2167

2168

2169

2170

2171

2172

2173

2174

2175

2176

2177

2178 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);

2179

2180 Register Hi = B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);

2181 Register MulLoHi = B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);

2182 Register Add = B.buildAdd(HalfTy, Hi, MulLoHi).getReg(0);

2183 Register MulHiLo = B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);

2184 B.buildAdd(DefRegs[1], Add, MulHiLo);

2185 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);

2186

2187 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);

2188 MI.eraseFromParent();

2189}

2190

2194 B.setInstrAndDebugLoc(MI);

2195 unsigned Opc = MI.getOpcode();

2197 switch (Opc) {

2198 case AMDGPU::G_CONSTANT:

2199 case AMDGPU::G_IMPLICIT_DEF: {

2200 Register DstReg = MI.getOperand(0).getReg();

2201 LLT DstTy = MRI.getType(DstReg);

2203 break;

2204

2207 if (DstBank == &AMDGPU::VCCRegBank)

2208 break;

2210 if (DefRegs.empty())

2212

2213 B.setInsertPt(*MI.getParent(), ++MI.getIterator());

2214

2216 LLVMContext &Ctx = B.getMF().getFunction().getContext();

2217

2218 MI.getOperand(0).setReg(NewDstReg);

2219 if (Opc != AMDGPU::G_IMPLICIT_DEF) {

2220 uint64_t ConstVal = MI.getOperand(1).getCImm()->getZExtValue();

2221 MI.getOperand(1).setCImm(

2223 }

2224

2225 MRI.setRegBank(NewDstReg, *DstBank);

2226 B.buildTrunc(DefRegs[0], NewDstReg);

2227 return;

2228 }

2229 case AMDGPU::G_PHI: {

2230 Register DstReg = MI.getOperand(0).getReg();

2231 LLT DstTy = MRI.getType(DstReg);

2233 break;

2234

2238 if (DstBank == &AMDGPU::VCCRegBank) {

2240

2241

2242

2243

2244

2245 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {

2246 Register SrcReg = MI.getOperand(I).getReg();

2248

2249 if (SrcBank != &AMDGPU::VCCRegBank) {

2252

2253 auto Copy = B.buildCopy(LLT::scalar(1), SrcReg);

2254 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);

2255 MI.getOperand(I).setReg(Copy.getReg(0));

2256 }

2257 }

2258

2259 return;

2260 }

2261

2262

2264

2265

2266 ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);

2267 B.setInsertPt(B.getMBB(), MI);

2269

2272

2273 return;

2274 }

2275 case AMDGPU::G_FCMP:

2276 if (Subtarget.hasSALUFloatInsts())

2277 break;

2278 [[fallthrough]];

2279 case AMDGPU::G_ICMP:

2280 case AMDGPU::G_UADDO:

2281 case AMDGPU::G_USUBO:

2282 case AMDGPU::G_UADDE:

2283 case AMDGPU::G_SADDE:

2284 case AMDGPU::G_USUBE:

2285 case AMDGPU::G_SSUBE: {

2286 unsigned BoolDstOp =

2287 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;

2288 Register DstReg = MI.getOperand(BoolDstOp).getReg();

2289

2292 if (DstBank != &AMDGPU::SGPRRegBank)

2293 break;

2294

2295 const bool HasCarryIn = MI.getNumOperands() == 5;

2296

2297

2298

2300 Register NewDstReg = MRI.createGenericVirtualRegister(S32);

2301 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);

2302 MI.getOperand(BoolDstOp).setReg(NewDstReg);

2303

2304 if (HasCarryIn) {

2305 Register NewSrcReg = MRI.createGenericVirtualRegister(S32);

2306 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);

2307 B.buildZExt(NewSrcReg, MI.getOperand(4).getReg());

2308 MI.getOperand(4).setReg(NewSrcReg);

2309 }

2310

2312 B.setInsertPt(*MBB, std::next(MI.getIterator()));

2313

2314

2315

2317 if (DefRegs.empty())

2319 B.buildTrunc(DefRegs[0], NewDstReg);

2320 return;

2321 }

2322 case AMDGPU::G_SELECT: {

2323 Register DstReg = MI.getOperand(0).getReg();

2324 LLT DstTy = MRI.getType(DstReg);

2325

2327 if (CondRegs.empty())

2328 CondRegs.push_back(MI.getOperand(1).getReg());

2329 else {

2331 }

2332

2334 if (CondBank == &AMDGPU::SGPRRegBank) {

2336 Register NewCondReg = MRI.createGenericVirtualRegister(S32);

2337 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);

2338

2339 MI.getOperand(1).setReg(NewCondReg);

2340 B.buildZExt(NewCondReg, CondRegs[0]);

2341 }

2342

2344 break;

2345

2347

2351

2352

2353 if (DefRegs.empty()) {

2355 break;

2356 }

2357

2358 if (Src1Regs.empty())

2360 else {

2362 }

2363

2364 if (Src2Regs.empty())

2366 else

2368

2370

2371 auto Flags = MI.getFlags();

2372 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0], Flags);

2373 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1], Flags);

2374

2375 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);

2376 MI.eraseFromParent();

2377 return;

2378 }

2379 case AMDGPU::G_BRCOND: {

2380 Register CondReg = MI.getOperand(0).getReg();

2381

2384

2385 if (CondBank == &AMDGPU::SGPRRegBank) {

2387 Register NewCondReg = MRI.createGenericVirtualRegister(S32);

2388 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);

2389

2390 MI.getOperand(0).setReg(NewCondReg);

2391 B.buildZExt(NewCondReg, CondReg);

2392 return;

2393 }

2394

2395 break;

2396 }

2397 case AMDGPU::G_AND:

2398 case AMDGPU::G_OR:

2399 case AMDGPU::G_XOR: {

2400

2401

2402 Register DstReg = MI.getOperand(0).getReg();

2403 LLT DstTy = MRI.getType(DstReg);

2404

2407

2409 if (DstBank == &AMDGPU::VCCRegBank)

2410 break;

2411

2413 ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);

2415

2419 return;

2420 }

2421

2422 if (DstTy.getSizeInBits() == 16 && DstBank == &AMDGPU::SGPRRegBank) {

2426 ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank);

2428

2429

2430

2431 if (MI.getOpcode() == AMDGPU::G_XOR &&

2436 } else {

2439 }

2440 return;

2441 }

2442

2444 break;

2445

2450

2451

2452 if (DefRegs.empty()) {

2454 break;

2455 }

2456

2459 (Src0Regs.empty() || Src0Regs.size() == 2));

2460

2461

2462

2463

2464

2465 if (Src0Regs.empty())

2467 else

2469

2470 if (Src1Regs.empty())

2472 else

2474

2476

2477 auto Flags = MI.getFlags();

2478 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]}, Flags);

2479 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]}, Flags);

2480

2481 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);

2482 MI.eraseFromParent();

2483 return;

2484 }

2485 case AMDGPU::G_ABS: {

2486 Register SrcReg = MI.getOperand(1).getReg();

2487 const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg);

2488

2489

2490

2491 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {

2493 ApplyRegBankMapping Apply(B, *this, MRI, &AMDGPU::VGPRRegBank);

2495

2498 return;

2499 }

2500 [[fallthrough]];

2501 }

2502 case AMDGPU::G_ADD:

2503 case AMDGPU::G_SUB:

2504 case AMDGPU::G_MUL:

2505 case AMDGPU::G_SHL:

2506 case AMDGPU::G_LSHR:

2507 case AMDGPU::G_ASHR:

2508 case AMDGPU::G_SMIN:

2509 case AMDGPU::G_SMAX:

2510 case AMDGPU::G_UMIN:

2511 case AMDGPU::G_UMAX: {

2512 Register DstReg = MI.getOperand(0).getReg();

2513 LLT DstTy = MRI.getType(DstReg);

2514

2515

2516

2517

2518 if (Subtarget.hasVectorMulU64() && Opc == AMDGPU::G_MUL &&

2521 return;

2522 }

2523

2524

2525

2527 break;

2528

2531 if (DstBank == &AMDGPU::VGPRRegBank)

2532 break;

2533

2537 ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank);

2538

2539 if (DstTy.isVector() && Opc == AMDGPU::G_ABS) {

2540 Register WideSrcLo, WideSrcHi;

2541

2542 std::tie(WideSrcLo, WideSrcHi) =

2544 auto Lo = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcLo});

2545 auto Hi = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcHi});

2546 B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});

2547 MI.eraseFromParent();

2548 return;

2549 }

2550

2552 Register WideSrc0Lo, WideSrc0Hi;

2553 Register WideSrc1Lo, WideSrc1Hi;

2554

2555 unsigned ExtendOp = getExtendOp(MI.getOpcode());

2556 std::tie(WideSrc0Lo, WideSrc0Hi)

2558 std::tie(WideSrc1Lo, WideSrc1Hi)

2560 auto Lo = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});

2561 auto Hi = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});

2562 B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});

2563 MI.eraseFromParent();

2564 } else {

2566

2569

2570

2571 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||

2572 Opc == AMDGPU::G_ASHR) {

2573 B.setInsertPt(*MBB, MI.getIterator());

2576 }

2577 }

2578

2579 return;

2580 }

2581 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:

2582 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {

2583

2584

2585

2586

2587

2588

2589

2590

2591

2592

2594

2595 Register DstReg = MI.getOperand(0).getReg();

2596 Register SrcReg0 = MI.getOperand(1).getReg();

2597 Register SrcReg1 = MI.getOperand(2).getReg();

2600 assert(MRI.getType(DstReg) == S64 && "This is a special case for s_mul_u64 "

2601 "that handles only 64-bit operands.");

2604

2605

2606

2607 if (DstBank == &AMDGPU::SGPRRegBank) {

2608 MI.setDesc(TII->get(AMDGPU::S_MUL_U64));

2609 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);

2610 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);

2611 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);

2612 return;

2613 }

2614

2615

2616

2617 assert(MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&

2618 "The destination operand should be in vector registers.");

2619

2620

2621 Register Op0L = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

2622 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);

2623 MRI.setType(Op0L, S32);

2624 B.buildTrunc(Op0L, SrcReg0);

2625

2626

2627 Register Op1L = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

2628 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);

2629 MRI.setType(Op1L, S32);

2630 B.buildTrunc(Op1L, SrcReg1);

2631

2632 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32

2633 ? AMDGPU::G_AMDGPU_MAD_U64_U32

2634 : AMDGPU::G_AMDGPU_MAD_I64_I32;

2635

2637 Register Zero64 = B.buildConstant(S64, 0).getReg(0);

2638 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);

2639 Register CarryOut = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);

2640 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);

2641 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});

2642 MI.eraseFromParent();

2643 return;

2644 }

2645 case AMDGPU::G_SEXT_INREG: {

2647 if (SrcRegs.empty())

2648 break;

2649

2651 ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);

2652

2653

2654

2655

2657

2658 int Amt = MI.getOperand(2).getImm();

2659 if (Amt <= 32) {

2660

2661

2662 if (Amt == 32) {

2663

2664 B.buildFreeze(DstRegs[0], SrcRegs[0]);

2665 } else {

2666 auto Freeze = B.buildFreeze(S32, SrcRegs[0]);

2667

2668 B.buildSExtInReg(DstRegs[0], Freeze, Amt);

2669 }

2670

2671 B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));

2672 } else {

2673

2674

2675 B.buildCopy(DstRegs[0], SrcRegs[0]);

2676 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);

2677 }

2678

2679 Register DstReg = MI.getOperand(0).getReg();

2680 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);

2681 MI.eraseFromParent();

2682 return;

2683 }

2684 case AMDGPU::G_CTPOP:

2685 case AMDGPU::G_BITREVERSE: {

2688 if (DstBank == &AMDGPU::SGPRRegBank)

2689 break;

2690

2691 Register SrcReg = MI.getOperand(1).getReg();

2693 LLT Ty = MRI.getType(SrcReg);

2694 if (Ty == S32)

2695 break;

2696

2697 ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank);

2698

2701

2704 return;

2705 }

2706 case AMDGPU::G_AMDGPU_FFBH_U32:

2707 case AMDGPU::G_AMDGPU_FFBL_B32:

2708 case AMDGPU::G_CTLZ_ZERO_UNDEF:

2709 case AMDGPU::G_CTTZ_ZERO_UNDEF: {

2712 if (DstBank == &AMDGPU::SGPRRegBank)

2713 break;

2714

2715 Register SrcReg = MI.getOperand(1).getReg();

2717 LLT Ty = MRI.getType(SrcReg);

2718 if (Ty == S32)

2719 break;

2720

2721

2722

2723

2724

2725

2726

2727 ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank);

2729 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF

2730 ? (unsigned)AMDGPU::G_AMDGPU_FFBH_U32

2731 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF

2732 ? (unsigned)AMDGPU::G_AMDGPU_FFBL_B32

2734 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;

2735 auto X = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx]});

2736 auto Y = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx ^ 1]});

2737 unsigned AddOpc =

2738 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF

2739 ? AMDGPU::G_ADD

2740 : AMDGPU::G_UADDSAT;

2741 Y = B.buildInstr(AddOpc, {S32}, {Y, B.buildConstant(S32, 32)});

2742 Register DstReg = MI.getOperand(0).getReg();

2743 B.buildUMin(DstReg, X, Y);

2744 MI.eraseFromParent();

2745 return;

2746 }

2747 case AMDGPU::G_SEXT:

2748 case AMDGPU::G_ZEXT:

2749 case AMDGPU::G_ANYEXT: {

2750 Register SrcReg = MI.getOperand(1).getReg();

2751 LLT SrcTy = MRI.getType(SrcReg);

2752 const bool Signed = Opc == AMDGPU::G_SEXT;

2753

2755

2758

2759 Register DstReg = MI.getOperand(0).getReg();

2760 LLT DstTy = MRI.getType(DstReg);

2762 SrcBank != &AMDGPU::SGPRRegBank &&

2763 SrcBank != &AMDGPU::VCCRegBank &&

2764

2765

2767 SrcTy.getSizeInBits() <= 32) {

2769

2770

2772

2773 B.buildSExtOrTrunc(DefRegs[0], SrcReg);

2774 } else if (Opc == AMDGPU::G_ZEXT) {

2775 B.buildZExtOrTrunc(DefRegs[0], SrcReg);

2776 } else {

2777 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);

2778 }

2779

2781 MRI.setRegBank(DstReg, *SrcBank);

2782 MI.eraseFromParent();

2783 return;

2784 }

2785

2787 return;

2788

2789

2790

2791

2792 if (SrcBank == &AMDGPU::VCCRegBank) {

2794

2795 const RegisterBank *DstBank = &AMDGPU::VGPRRegBank;

2796

2798

2799 const bool UseSel64 = DstSize > 32 &&

2800 SrcBank->getID() == AMDGPU::SGPRRegBankID;

2801

2802

2804 auto True = B.buildConstant(SelType, Signed ? -1 : 1);

2805 auto False = B.buildConstant(SelType, 0);

2806

2807 MRI.setRegBank(True.getReg(0), *DstBank);

2808 MRI.setRegBank(False.getReg(0), *DstBank);

2809 MRI.setRegBank(DstReg, *DstBank);

2810

2811 if (DstSize > 32) {

2812 B.buildSelect(DefRegs[0], SrcReg, True, False);

2814 } else if (DstSize < 32) {

2815 auto Sel = B.buildSelect(SelType, SrcReg, True, False);

2816 MRI.setRegBank(Sel.getReg(0), *DstBank);

2817 B.buildTrunc(DstReg, Sel);

2818 } else {

2819 B.buildSelect(DstReg, SrcReg, True, False);

2820 }

2821

2822 MI.eraseFromParent();

2823 return;

2824 }

2825

2826 break;

2827 }

2828 case AMDGPU::G_EXTRACT_VECTOR_ELT: {

2830

2832

2833 Register DstReg = MI.getOperand(0).getReg();

2834 Register SrcReg = MI.getOperand(1).getReg();

2835

2837 LLT DstTy = MRI.getType(DstReg);

2838 LLT SrcTy = MRI.getType(SrcReg);

2839

2840 if (foldExtractEltToCmpSelect(B, MI, OpdMapper))

2841 return;

2842

2850

2852 unsigned ConstOffset;

2853 std::tie(BaseIdxReg, ConstOffset) =

2855

2856

2857

2858

2859

2860 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&

2861 ConstOffset > 0 &&

2862 ConstOffset < SrcTy.getNumElements();

2863

2864

2865 if (ShouldMoveIndexIntoLoop)

2866 MI.getOperand(2).setReg(BaseIdxReg);

2867

2868

2869

2870

2871

2872 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&

2873 SrcBank == &AMDGPU::SGPRRegBank;

2874 if (DstRegs.empty()) {

2876

2878

2879 if (NeedCopyToVGPR) {

2880

2881 Register TmpReg = MRI.createGenericVirtualRegister(DstTy);

2882 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);

2883 MI.getOperand(0).setReg(TmpReg);

2884 B.setInsertPt(*MI.getParent(), ++MI.getIterator());

2885

2886

2888 }

2889

2890

2891 if (ShouldMoveIndexIntoLoop)

2893

2894 return;

2895 }

2896

2898

2900

2901 auto CastSrc = B.buildBitcast(Vec32, SrcReg);

2902 auto One = B.buildConstant(S32, 1);

2903

2905

2906

2907

2908

2909

2911

2912

2913 auto IdxLo = B.buildShl(S32, BaseIdxReg, One);

2914 auto IdxHi = B.buildAdd(S32, IdxLo, One);

2915

2916 auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);

2917 auto Extract1 = B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);

2918

2919 MRI.setRegBank(DstReg, *DstBank);

2920 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);

2921 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);

2922 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);

2923 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);

2924

2927 MI.eraseFromParent();

2928 return;

2929 }

2930

2931

2932

2933 B.setInstr(*Span.begin());

2934 MI.eraseFromParent();

2936 OpsToWaterfall);

2937

2938 if (NeedCopyToVGPR) {

2940 Register TmpReg0 = MRI.createGenericVirtualRegister(S32);

2941 Register TmpReg1 = MRI.createGenericVirtualRegister(S32);

2942 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);

2943 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);

2944

2945 Extract0->getOperand(0).setReg(TmpReg0);

2946 Extract1->getOperand(0).setReg(TmpReg1);

2947

2948 B.setInsertPt(*LoopBB, ++Extract1->getIterator());

2949

2952 }

2953

2954 if (ShouldMoveIndexIntoLoop)

2956

2957 return;

2958 }

2959 case AMDGPU::G_INSERT_VECTOR_ELT: {

2961

2962 Register DstReg = MI.getOperand(0).getReg();

2963 LLT VecTy = MRI.getType(DstReg);

2964

2967

2969 MRI.setType(MI.getOperand(1).getReg(), VecTy);

2970

2971 if (foldInsertEltToCmpSelect(B, MI, OpdMapper))

2972 return;

2973

2976

2977 Register SrcReg = MI.getOperand(1).getReg();

2978 Register InsReg = MI.getOperand(2).getReg();

2979 LLT InsTy = MRI.getType(InsReg);

2980 (void)InsTy;

2981

2983 unsigned ConstOffset;

2984 std::tie(BaseIdxReg, ConstOffset) =

2986

2987

2988

2989

2990

2991 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&

2992 ConstOffset > 0 &&

2994

2995

2996 if (ShouldMoveIndexIntoLoop)

2997 MI.getOperand(3).setReg(BaseIdxReg);

2998

2999

3000 if (InsRegs.empty()) {

3002

3003

3004 if (ShouldMoveIndexIntoLoop) {

3006 }

3007

3008 return;

3009 }

3010

3012

3015

3016 auto CastSrc = B.buildBitcast(Vec32, SrcReg);

3017 auto One = B.buildConstant(S32, 1);

3018

3019

3020

3021

3022

3024

3025

3026 auto IdxLo = B.buildShl(S32, BaseIdxReg, One);

3027 auto IdxHi = B.buildAdd(S32, IdxLo, One);

3028

3029 auto InsLo = B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);

3030 auto InsHi = B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);

3031

3038

3039 MRI.setRegBank(InsReg, *InsSrcBank);

3040 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);

3041 MRI.setRegBank(InsLo.getReg(0), *DstBank);

3042 MRI.setRegBank(InsHi.getReg(0), *DstBank);

3043 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);

3044 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);

3045 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);

3046

3047

3050 B.setInsertPt(B.getMBB(), MI);

3051 B.buildBitcast(DstReg, InsHi);

3052 MI.eraseFromParent();

3053 return;

3054 }

3055

3056 B.setInstr(*Span.begin());

3057 MI.eraseFromParent();

3058

3059

3060

3062 OpsToWaterfall);

3063

3064

3065

3066

3067

3068 B.buildBitcast(DstReg, InsHi);

3069

3070

3071 if (ShouldMoveIndexIntoLoop)

3073

3074 return;

3075 }

3076 case AMDGPU::G_AMDGPU_BUFFER_LOAD:

3077 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:

3078 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:

3079 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:

3080 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:

3081 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:

3082 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:

3083 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:

3084 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:

3085 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:

3086 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:

3087 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:

3088 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:

3089 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:

3090 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:

3091 case AMDGPU::G_AMDGPU_BUFFER_STORE:

3092 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:

3093 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:

3094 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:

3095 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:

3096 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:

3097 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {

3100 return;

3101 }

3102 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:

3103 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:

3104 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:

3105 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:

3106 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:

3107 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:

3108 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:

3109 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:

3110 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:

3111 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:

3112 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:

3113 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:

3114 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32:

3115 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32:

3116 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:

3117 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:

3118 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {

3121 return;

3122 }

3123 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {

3126 return;

3127 }

3128 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:

3129 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:

3130 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:

3131 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:

3132 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {

3134 return;

3135 }

3136 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:

3139 return;

3140 case AMDGPU::G_INTRINSIC:

3141 case AMDGPU::G_INTRINSIC_CONVERGENT: {

3143 case Intrinsic::amdgcn_readlane: {

3145

3148

3149

3150

3152 return;

3153 }

3154 case Intrinsic::amdgcn_writelane: {

3158

3162 return;

3163 }

3164 case Intrinsic::amdgcn_interp_p1:

3165 case Intrinsic::amdgcn_interp_p2:

3166 case Intrinsic::amdgcn_interp_mov:

3167 case Intrinsic::amdgcn_interp_p1_f16:

3168 case Intrinsic::amdgcn_interp_p2_f16:

3169 case Intrinsic::amdgcn_lds_param_load: {

3171

3172

3173

3175 return;

3176 }

3177 case Intrinsic::amdgcn_interp_inreg_p10:

3178 case Intrinsic::amdgcn_interp_inreg_p2:

3179 case Intrinsic::amdgcn_interp_inreg_p10_f16:

3180 case Intrinsic::amdgcn_interp_inreg_p2_f16:

3181 case Intrinsic::amdgcn_interp_p10_rtz_f16:

3182 case Intrinsic::amdgcn_interp_p2_rtz_f16:

3183 case Intrinsic::amdgcn_permlane16_swap:

3184 case Intrinsic::amdgcn_permlane32_swap:

3186 return;

3187 case Intrinsic::amdgcn_permlane16:

3188 case Intrinsic::amdgcn_permlanex16: {

3189

3194 return;

3195 }

3196 case Intrinsic::amdgcn_permlane_bcast:

3197 case Intrinsic::amdgcn_permlane_up:

3198 case Intrinsic::amdgcn_permlane_down:

3199 case Intrinsic::amdgcn_permlane_xor:

3200

3203 return;

3204 case Intrinsic::amdgcn_permlane_idx_gen: {

3206 return;

3207 }

3208 case Intrinsic::amdgcn_sbfe:

3210 return;

3211 case Intrinsic::amdgcn_ubfe:

3213 return;

3214 case Intrinsic::amdgcn_inverse_ballot:

3215 case Intrinsic::amdgcn_s_bitreplicate:

3216 case Intrinsic::amdgcn_s_quadmask:

3217 case Intrinsic::amdgcn_s_wqm:

3220 return;

3221 case Intrinsic::amdgcn_ballot:

3222

3223 break;

3224 }

3225 break;

3226 }

3227 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:

3228 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:

3229 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:

3230 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:

3231 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {

3235

3236

3237

3239 return;

3240 }

3241 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:

3242 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:

3243 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {

3244 bool IsDualOrBVH8 =

3245 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||

3246 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;

3247 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;

3248 unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;

3251 return;

3252 }

3253 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:

3254 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {

3256 switch (IntrID) {

3257 case Intrinsic::amdgcn_ds_ordered_add:

3258 case Intrinsic::amdgcn_ds_ordered_swap: {

3259

3263 return;

3264 }

3265 case Intrinsic::amdgcn_ds_gws_init:

3266 case Intrinsic::amdgcn_ds_gws_barrier:

3267 case Intrinsic::amdgcn_ds_gws_sema_br: {

3268

3271 return;

3272 }

3273 case Intrinsic::amdgcn_ds_gws_sema_v:

3274 case Intrinsic::amdgcn_ds_gws_sema_p:

3275 case Intrinsic::amdgcn_ds_gws_sema_release_all: {

3276

3278 return;

3279 }

3280 case Intrinsic::amdgcn_ds_append:

3281 case Intrinsic::amdgcn_ds_consume: {

3283 return;

3284 }

3285 case Intrinsic::amdgcn_s_sendmsg:

3286 case Intrinsic::amdgcn_s_sendmsghalt: {

3287

3289 return;

3290 }

3291 case Intrinsic::amdgcn_s_setreg: {

3293 return;

3294 }

3295 case Intrinsic::amdgcn_s_ttracedata:

3297 return;

3298 case Intrinsic::amdgcn_raw_buffer_load_lds:

3299 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {

3304 return;

3305 }

3306 case Intrinsic::amdgcn_struct_buffer_load_lds:

3307 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {

3312 return;

3313 }

3314 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:

3315 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:

3316 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:

3317 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {

3320 return;

3321 }

3322 case Intrinsic::amdgcn_load_to_lds:

3323 case Intrinsic::amdgcn_global_load_lds: {

3326 return;

3327 }

3328 case Intrinsic::amdgcn_lds_direct_load: {

3330

3332 return;

3333 }

3334 case Intrinsic::amdgcn_exp_row:

3337 return;

3338 case Intrinsic::amdgcn_cluster_load_b32:

3339 case Intrinsic::amdgcn_cluster_load_b64:

3340 case Intrinsic::amdgcn_cluster_load_b128: {

3343 return;

3344 }

3345 case Intrinsic::amdgcn_s_sleep_var:

3348 return;

3349 case Intrinsic::amdgcn_s_barrier_join:

3351 return;

3352 case Intrinsic::amdgcn_s_barrier_init:

3353 case Intrinsic::amdgcn_s_barrier_signal_var:

3356 return;

3357 case Intrinsic::amdgcn_s_get_barrier_state:

3358 case Intrinsic::amdgcn_s_get_named_barrier_state: {

3360 return;

3361 }

3362 case Intrinsic::amdgcn_s_prefetch_data: {

3363 Register PtrReg = MI.getOperand(1).getReg();

3364 unsigned AS = MRI.getType(PtrReg).getAddressSpace();

3368 } else

3369 MI.eraseFromParent();

3370 return;

3371 }

3372 case Intrinsic::amdgcn_tensor_load_to_lds:

3373 case Intrinsic::amdgcn_tensor_store_from_lds: {

3378 return;

3379 }

3380 case Intrinsic::amdgcn_tensor_load_to_lds_d2:

3381 case Intrinsic::amdgcn_tensor_store_from_lds_d2: {

3384 return;

3385 }

3386 default: {

3389

3390

3391

3392 if (RSrcIntrin->IsImage) {

3394 return;

3395 }

3396 }

3397

3398 break;

3399 }

3400 }

3401 break;

3402 }

3403 case AMDGPU::G_SI_CALL: {

3404

3405

3407

3409 break;

3410

3411

3412

3413

3414 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;

3415 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;

3416

3417

3418

3420

3421 unsigned NonCopyInstrsLen = 0;

3427 while (Start->getOpcode() != FrameSetupOpcode) {

3428 --Start;

3429 bool IsCopy = false;

3430 if (Start->getOpcode() == AMDGPU::COPY) {

3431 auto &Dst = Start->getOperand(0);

3432 if (Dst.isReg()) {

3433 Register Reg = Dst.getReg();

3434 if (Reg.isPhysical() && MI.readsRegister(Reg, TRI)) {

3435 IsCopy = true;

3436 } else {

3437

3438

3439 auto &Src = Start->getOperand(1);

3440 if (Src.isReg()) {

3441 Reg = Src.getReg();

3442 IsCopy = Info->getScratchRSrcReg() == Reg;

3443 }

3444 }

3445 }

3446 }

3447

3448 if (IsCopy) {

3449 LastCopy = Start;

3450 NonCopyInstrsLen = NonCopyInstrs.size();

3451 } else {

3452 NonCopyInstrs.push_back(&*Start);

3453 }

3454 }

3455 NonCopyInstrs.resize(NonCopyInstrsLen);

3456

3457 for (auto *NonCopy : reverse(NonCopyInstrs)) {

3458 MBB->splice(LastCopy, MBB, NonCopy->getIterator());

3459 }

3460 Start = LastCopy;

3461

3462

3463 NonCopyInstrs.clear();

3464 NonCopyInstrsLen = 0;

3466 LastCopy = End;

3467 while (End->getOpcode() != FrameDestroyOpcode) {

3468 ++End;

3469 bool IsCopy = false;

3470 if (End->getOpcode() == AMDGPU::COPY) {

3471 auto &Src = End->getOperand(1);

3472 if (Src.isReg()) {

3473 Register Reg = Src.getReg();

3474 IsCopy = Reg.isPhysical() && MI.modifiesRegister(Reg, TRI);

3475 }

3476 }

3477

3478 if (IsCopy) {

3479 LastCopy = End;

3480 NonCopyInstrsLen = NonCopyInstrs.size();

3481 } else {

3483 }

3484 }

3485 NonCopyInstrs.resize(NonCopyInstrsLen);

3486

3487 End = LastCopy;

3488 ++LastCopy;

3489 for (auto *NonCopy : reverse(NonCopyInstrs)) {

3490 MBB->splice(LastCopy, MBB, NonCopy->getIterator());

3491 }

3492

3493 ++End;

3494 B.setInsertPt(B.getMBB(), Start);

3496 break;

3497 }

3498 case AMDGPU::G_LOAD:

3499 case AMDGPU::G_ZEXTLOAD:

3500 case AMDGPU::G_SEXTLOAD: {

3502 return;

3503 break;

3504 }

3505 case AMDGPU::G_DYN_STACKALLOC:

3507 return;

3508 case AMDGPU::G_STACKRESTORE: {

3511 return;

3512 }

3513 case AMDGPU::G_SBFX:

3515 return;

3516 case AMDGPU::G_UBFX:

3518 return;

3519 case AMDGPU::G_AMDGPU_MAD_U64_U32:

3520 case AMDGPU::G_AMDGPU_MAD_I64_I32:

3522 return;

3523 case AMDGPU::G_PREFETCH: {

3524 if (Subtarget.hasSafeSmemPrefetch() && Subtarget.hasVmemPrefInsts()) {

3525 MI.eraseFromParent();

3526 return;

3527 }

3528 Register PtrReg = MI.getOperand(0).getReg();

3529 unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID);

3530 if (PtrBank == AMDGPU::VGPRRegBankID &&

3531 (Subtarget.hasVmemPrefInsts() || MI.getOperand(3).getImm())) {

3532

3533 MI.eraseFromParent();

3534 return;

3535 }

3536 unsigned AS = MRI.getType(PtrReg).getAddressSpace();

3539 (Subtarget.hasSafeSmemPrefetch() &&

3541 MI.getOperand(3).getImm() ))) {

3542 MI.eraseFromParent();

3543 return;

3544 }

3546 return;

3547 }

3548 default:

3549 break;

3550 }

3551

3553}

3554

3555

3556

3557

3558

3560 if (RB0 == AMDGPU::InvalidRegBankID)

3561 return RB1;

3562 if (RB1 == AMDGPU::InvalidRegBankID)

3563 return RB0;

3564

3565 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)

3566 return AMDGPU::SGPRRegBankID;

3567

3568 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)

3569 return AMDGPU::AGPRRegBankID;

3570

3571 return AMDGPU::VGPRRegBankID;

3572}

3573

3575 if (RB0 == AMDGPU::InvalidRegBankID)

3576 return RB1;

3577 if (RB1 == AMDGPU::InvalidRegBankID)

3578 return RB0;

3579

3580

3581

3582

3583 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)

3584 return AMDGPU::VCCRegBankID;

3585

3586

3588}

3589

3592 unsigned RegBank = AMDGPU::InvalidRegBankID;

3593

3595 if (!MO.isReg())

3596 continue;

3599 RegBank = regBankUnion(RegBank, Bank->getID());

3600 if (RegBank == AMDGPU::VGPRRegBankID)

3601 break;

3602 }

3603 }

3604

3605 return RegBank;

3606}

3607

3612 if (!MO.isReg())

3613 continue;

3616 if (Bank->getID() != AMDGPU::SGPRRegBankID)

3617 return false;

3618 }

3619 }

3620 return true;

3621}

3622

3628

3629 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

3631 if (SrcOp.isReg())

3632 continue;

3633

3635 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

3636 }

3638 MI.getNumOperands());

3639}

3640

3646

3647

3648

3649

3650

3651 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

3653 if (!Src.isReg())

3654 continue;

3655

3657 unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;

3658 OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);

3659 }

3660

3662 MI.getNumOperands());

3663}

3664

3670

3671 for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {

3673 if (Op.isReg())

3674 continue;

3675

3677 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

3678 }

3679

3681 MI.getNumOperands());

3682}

3683

3687 int RsrcIdx) const {

3688

3689

3690 RsrcIdx += MI.getNumExplicitDefs() + 1;

3691

3692 const int NumOps = MI.getNumOperands();

3694

3695

3696

3697 for (int I = 0; I != NumOps; ++I) {

3698 if (MI.getOperand(I).isReg())

3699 continue;

3700

3701 Register OpReg = MI.getOperand(I).getReg();

3702

3703 if (!OpReg)

3704 continue;

3705

3707

3708

3709

3710

3711

3712 const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;

3713

3714 if (MustBeSGPR) {

3715

3716 unsigned NewBank = getRegBankID(OpReg, MRI, AMDGPU::SGPRRegBankID);

3717 OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);

3718 } else {

3719

3720 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

3721 }

3722 }

3723

3725}

3726

3727

3731 LLT PtrTy = MRI.getType(PtrReg);

3733 if (Subtarget.useFlatForGlobal() ||

3735 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

3736

3737

3738

3740 return AMDGPU::getValueMapping(PtrBank->getID(), Size);

3741}

3742

3745

3750 Register PtrReg = MI.getOperand(1).getReg();

3751 LLT PtrTy = MRI.getType(PtrReg);

3754

3757

3759

3762

3763 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

3764 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);

3765 } else {

3766 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

3767

3768

3769

3770 unsigned PtrBankID = Subtarget.useFlatForGlobal() ?

3771 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;

3772

3773 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);

3774 }

3775 } else {

3776 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

3777 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);

3778 }

3779

3780 OpdsMapping[0] = ValMapping;

3781 OpdsMapping[1] = PtrMapping;

3784 return Mapping;

3785

3786

3787

3788}

3789

3790unsigned

3793 unsigned Default) const {

3796}

3797

3802

3803

3804 unsigned Bank = getRegBankID(Reg, MRI, AMDGPU::SGPRRegBankID);

3806 return AMDGPU::getValueMapping(Bank, Size);

3807}

3808

3814 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

3815}

3816

3822 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID, Size);

3823}

3824

3825

3826

3827

3828

3829

3830

3831

3832

3833

3838

3839 if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) {

3840 Register DstReg = MI.getOperand(0).getReg();

3841 Register SrcReg = MI.getOperand(1).getReg();

3842

3843

3844

3847

3848

3849

3850

3851 unsigned Size;

3852 if (!SrcReg.isVirtual() && !DstBank &&

3854 DstBank = &AMDGPU::VCCRegBank;

3857 DstBank = &AMDGPU::VCCRegBank;

3859 } else {

3861 }

3862

3863 if (!DstBank)

3864 DstBank = SrcBank;

3865 else if (!SrcBank)

3866 SrcBank = DstBank;

3867

3868 if (MI.getOpcode() != AMDGPU::G_FREEZE &&

3871

3873 unsigned OpdsMappingSize = MI.isCopy() ? 1 : 2;

3875 OpdsMapping[0] = &ValMap;

3876 if (MI.getOpcode() == AMDGPU::G_FREEZE)

3877 OpdsMapping[1] = &ValMap;

3878

3880 1, 1,

3881 getOperandsMapping(OpdsMapping), OpdsMappingSize);

3882 }

3883

3884 if (MI.isRegSequence()) {

3885

3886

3887 unsigned BankID = AMDGPU::SGPRRegBankID;

3888

3889 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {

3891

3892

3893 if (OpBank != AMDGPU::SGPRRegBankID) {

3894 BankID = AMDGPU::VGPRRegBankID;

3895 break;

3896 }

3897 }

3899

3902 1, 1,

3904 }

3905

3906

3907

3908

3909

3911 unsigned ResultBank = AMDGPU::InvalidRegBankID;

3913

3914

3916 ResultBank = DstBank->getID();

3917

3918 for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I) {

3921

3922

3923 if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {

3924 ResultBank = AMDGPU::VGPRRegBankID;

3925 break;

3926 }

3927

3928

3929 unsigned OpBank = Bank->getID();

3931 }

3932

3933 assert(ResultBank != AMDGPU::InvalidRegBankID);

3934

3935 unsigned Size = MRI.getType(DstReg).getSizeInBits();

3936

3940 1, 1,

3942 }

3943

3946 return Mapping;

3947

3949

3950 switch (MI.getOpcode()) {

3951 default:

3953

3954 case AMDGPU::G_AND:

3955 case AMDGPU::G_OR:

3956 case AMDGPU::G_XOR:

3957 case AMDGPU::G_MUL: {

3958 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

3959 if (Size == 1) {

3962

3963 unsigned TargetBankID = AMDGPU::InvalidRegBankID;

3964 unsigned BankLHS = AMDGPU::InvalidRegBankID;

3965 unsigned BankRHS = AMDGPU::InvalidRegBankID;

3966 if (DstBank) {

3967 TargetBankID = DstBank->getID();

3968 if (DstBank == &AMDGPU::VCCRegBank) {

3969 TargetBankID = AMDGPU::VCCRegBankID;

3970 BankLHS = AMDGPU::VCCRegBankID;

3971 BankRHS = AMDGPU::VCCRegBankID;

3972 } else {

3974 AMDGPU::SGPRRegBankID);

3976 AMDGPU::SGPRRegBankID);

3977 }

3978 } else {

3980 AMDGPU::VCCRegBankID);

3982 AMDGPU::VCCRegBankID);

3983

3984

3985 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {

3986 TargetBankID = AMDGPU::VGPRRegBankID;

3987 } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {

3988 TargetBankID = AMDGPU::VCCRegBankID;

3989 BankLHS = AMDGPU::VCCRegBankID;

3990 BankRHS = AMDGPU::VCCRegBankID;

3991 } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {

3992 TargetBankID = AMDGPU::SGPRRegBankID;

3993 }

3994 }

3995

3996 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);

3997 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);

3998 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);

3999 break;

4000 }

4001

4002 if (Size == 64) {

4003

4005 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);

4006 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];

4007 } else {

4008 if (MI.getOpcode() == AMDGPU::G_MUL && Subtarget.hasVectorMulU64())

4009 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4010 else

4011 OpdsMapping[0] =

4012 getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);

4013 unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI );

4014 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);

4015

4016 unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI );

4017 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);

4018 }

4019

4020 break;

4021 }

4022

4023 [[fallthrough]];

4024 }

4025 case AMDGPU::G_PTR_ADD:

4026 case AMDGPU::G_PTRMASK:

4027 case AMDGPU::G_ADD:

4028 case AMDGPU::G_SUB:

4029 case AMDGPU::G_SHL:

4030 case AMDGPU::G_LSHR:

4031 case AMDGPU::G_ASHR:

4032 case AMDGPU::G_UADDO:

4033 case AMDGPU::G_USUBO:

4034 case AMDGPU::G_UADDE:

4035 case AMDGPU::G_SADDE:

4036 case AMDGPU::G_USUBE:

4037 case AMDGPU::G_SSUBE:

4038 case AMDGPU::G_ABS:

4039 case AMDGPU::G_SHUFFLE_VECTOR:

4040 case AMDGPU::G_SBFX:

4041 case AMDGPU::G_UBFX:

4042 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:

4043 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:

4047 case AMDGPU::G_SMIN:

4048 case AMDGPU::G_SMAX:

4049 case AMDGPU::G_UMIN:

4050 case AMDGPU::G_UMAX:

4052

4053 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 64 &&

4057 }

4059 case AMDGPU::G_FADD:

4060 case AMDGPU::G_FSUB:

4061 case AMDGPU::G_FMUL:

4062 case AMDGPU::G_FMA:

4063 case AMDGPU::G_FFLOOR:

4064 case AMDGPU::G_FCEIL:

4065 case AMDGPU::G_INTRINSIC_ROUNDEVEN:

4066 case AMDGPU::G_FMINNUM:

4067 case AMDGPU::G_FMAXNUM:

4068 case AMDGPU::G_FMINIMUM:

4069 case AMDGPU::G_FMAXIMUM:

4070 case AMDGPU::G_FMINIMUMNUM:

4071 case AMDGPU::G_FMAXIMUMNUM:

4072 case AMDGPU::G_INTRINSIC_TRUNC:

4073 case AMDGPU::G_STRICT_FADD:

4074 case AMDGPU::G_STRICT_FSUB:

4075 case AMDGPU::G_STRICT_FMUL:

4076 case AMDGPU::G_STRICT_FMA: {

4077 LLT Ty = MRI.getType(MI.getOperand(0).getReg());

4078 unsigned Size = Ty.getSizeInBits();

4079 if (Subtarget.hasSALUFloatInsts() && Ty.isScalar() &&

4083 }

4084 case AMDGPU::G_FPTOSI:

4085 case AMDGPU::G_FPTOUI:

4086 case AMDGPU::G_SITOFP:

4087 case AMDGPU::G_UITOFP: {

4088 unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4089 unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4090 if (Subtarget.hasSALUFloatInsts() && SizeDst == 32 && SizeSrc == 32 &&

4094 }

4095 case AMDGPU::G_FPTRUNC:

4096 case AMDGPU::G_FPEXT: {

4097 unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4098 unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4099 if (Subtarget.hasSALUFloatInsts() && SizeDst != 64 && SizeSrc != 64 &&

4103 }

4104 case AMDGPU::G_FSQRT:

4105 case AMDGPU::G_FEXP2:

4106 case AMDGPU::G_FLOG2: {

4107 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4108 if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&

4112 }

4113 case AMDGPU::G_SADDSAT:

4114 case AMDGPU::G_SSUBSAT:

4115 case AMDGPU::G_UADDSAT:

4116 case AMDGPU::G_USUBSAT:

4117 case AMDGPU::G_FMAD:

4118 case AMDGPU::G_FLDEXP:

4119 case AMDGPU::G_FMINNUM_IEEE:

4120 case AMDGPU::G_FMAXNUM_IEEE:

4121 case AMDGPU::G_FCANONICALIZE:

4122 case AMDGPU::G_STRICT_FLDEXP:

4123 case AMDGPU::G_BSWAP:

4124 case AMDGPU::G_FSHR:

4125 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

4126 case AMDGPU::G_AMDGPU_FMAX_LEGACY:

4127 case AMDGPU::G_AMDGPU_RCP_IFLAG:

4128 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:

4129 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:

4130 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:

4131 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:

4132 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:

4133 case AMDGPU::G_AMDGPU_SMED3:

4134 case AMDGPU::G_AMDGPU_FMED3:

4136 case AMDGPU::G_UMULH:

4137 case AMDGPU::G_SMULH: {

4141 }

4142 case AMDGPU::G_AMDGPU_MAD_U64_U32:

4143 case AMDGPU::G_AMDGPU_MAD_I64_I32: {

4144

4145

4146

4147

4148

4149

4150

4151

4152 bool AllSalu = true;

4153 bool MulSalu = true;

4154 for (unsigned i = 0; i < 5; ++i) {

4155 Register Reg = MI.getOperand(i).getReg();

4157 if (Bank->getID() != AMDGPU::SGPRRegBankID) {

4158 AllSalu = false;

4159 if (i == 2 || i == 3) {

4160 MulSalu = false;

4161 break;

4162 }

4163 }

4164 }

4165 }

4166

4167 if (AllSalu)

4169

4170

4171

4172

4173 if (!MulSalu || Subtarget.hasFullRate64Ops())

4175

4176

4177 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);

4178 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

4179 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);

4180 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);

4181 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);

4182 break;

4183 }

4184 case AMDGPU::G_IMPLICIT_DEF: {

4185 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4186 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

4187 break;

4188 }

4189 case AMDGPU::G_FCONSTANT:

4190 case AMDGPU::G_CONSTANT:

4191 case AMDGPU::G_GLOBAL_VALUE:

4192 case AMDGPU::G_FRAME_INDEX:

4193 case AMDGPU::G_BLOCK_ADDR:

4194 case AMDGPU::G_READSTEADYCOUNTER:

4195 case AMDGPU::G_READCYCLECOUNTER: {

4196 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4197 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

4198 break;

4199 }

4200 case AMDGPU::G_DYN_STACKALLOC: {

4201

4202 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);

4203 unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI);

4204 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);

4205 break;

4206 }

4207 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {

4208

4209

4210

4211

4212 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);

4213 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);

4214 break;

4215 }

4216 case AMDGPU::G_INSERT: {

4221 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);

4222 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);

4223 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);

4224 OpdsMapping[3] = nullptr;

4225 break;

4226 }

4227 case AMDGPU::G_EXTRACT: {

4231 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);

4232 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);

4233 OpdsMapping[2] = nullptr;

4234 break;

4235 }

4236 case AMDGPU::G_BUILD_VECTOR:

4237 case AMDGPU::G_BUILD_VECTOR_TRUNC: {

4238 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

4241 unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4242 unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);

4243 unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI);

4244 unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);

4245

4246 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);

4247 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);

4248 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);

4249 break;

4250 }

4251

4252 [[fallthrough]];

4253 }

4254 case AMDGPU::G_MERGE_VALUES:

4255 case AMDGPU::G_CONCAT_VECTORS: {

4257 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4258 unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4259

4260 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);

4261

4262 for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)

4263 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);

4264 break;

4265 }

4266 case AMDGPU::G_BITREVERSE:

4267 case AMDGPU::G_BITCAST:

4268 case AMDGPU::G_INTTOPTR:

4269 case AMDGPU::G_PTRTOINT:

4270 case AMDGPU::G_FABS:

4271 case AMDGPU::G_FNEG: {

4272 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4274 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);

4275 break;

4276 }

4277 case AMDGPU::G_AMDGPU_FFBH_U32:

4278 case AMDGPU::G_AMDGPU_FFBL_B32:

4279 case AMDGPU::G_CTLZ_ZERO_UNDEF:

4280 case AMDGPU::G_CTTZ_ZERO_UNDEF: {

4281 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4283 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);

4284 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);

4285 break;

4286 }

4287 case AMDGPU::G_CTPOP: {

4288 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4290 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);

4291

4292

4293

4294

4295 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);

4296 break;

4297 }

4298 case AMDGPU::G_TRUNC: {

4299 Register Dst = MI.getOperand(0).getReg();

4300 Register Src = MI.getOperand(1).getReg();

4304 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);

4305 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);

4306 break;

4307 }

4308 case AMDGPU::G_ZEXT:

4309 case AMDGPU::G_SEXT:

4310 case AMDGPU::G_ANYEXT:

4311 case AMDGPU::G_SEXT_INREG: {

4312 Register Dst = MI.getOperand(0).getReg();

4313 Register Src = MI.getOperand(1).getReg();

4316

4317 unsigned DstBank;

4320 switch (SrcBank->getID()) {

4321 case AMDGPU::SGPRRegBankID:

4322 DstBank = AMDGPU::SGPRRegBankID;

4323 break;

4324 default:

4325 DstBank = AMDGPU::VGPRRegBankID;

4326 break;

4327 }

4328

4329

4330

4331 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);

4332 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),

4333 SrcSize);

4334 break;

4335 }

4336 case AMDGPU::G_IS_FPCLASS: {

4337 Register SrcReg = MI.getOperand(1).getReg();

4338 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();

4339 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4340 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);

4341 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);

4342 break;

4343 }

4344 case AMDGPU::G_STORE: {

4345 assert(MI.getOperand(0).isReg());

4346 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4347

4348

4349

4351 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4352 OpdsMapping[0] = ValMapping;

4354 break;

4355 }

4356 case AMDGPU::G_ICMP:

4357 case AMDGPU::G_FCMP: {

4358 unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

4359

4360

4361

4363 AMDGPU::SGPRRegBankID);

4364 unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI);

4365 unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI);

4366

4367 auto canUseSCCICMP = [&]() {

4368 auto Pred =

4370 return Size == 32 ||

4371 (Size == 64 &&

4373 Subtarget.hasScalarCompareEq64());

4374 };

4375 auto canUseSCCFCMP = [&]() {

4376 return Subtarget.hasSALUFloatInsts() && (Size == 32 || Size == 16);

4377 };

4378

4379 bool isICMP = MI.getOpcode() == AMDGPU::G_ICMP;

4380 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&

4381 Op2Bank == AMDGPU::SGPRRegBankID &&

4382 Op3Bank == AMDGPU::SGPRRegBankID &&

4383 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());

4384

4385 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;

4386 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;

4387

4388

4389

4390 const unsigned ResultSize = 1;

4391

4392 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);

4393 OpdsMapping[1] = nullptr;

4394 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, Size);

4395 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank, Size);

4396 break;

4397 }

4398 case AMDGPU::G_EXTRACT_VECTOR_ELT: {

4399

4400 unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI);

4401 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4402 unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4403 unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

4404 unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI);

4405 unsigned OutputBankID = regBankUnion(SrcBankID, IdxBank);

4406

4407 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);

4408 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);

4409

4410

4411 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);

4412 break;

4413 }

4414 case AMDGPU::G_INSERT_VECTOR_ELT: {

4416 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;

4417

4418 unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4419 unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

4420 unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();

4421 unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(), MRI);

4422 unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI);

4423

4424 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);

4425 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);

4426

4427

4428

4429 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {

4430 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,

4431 InsertSize);

4432 } else {

4433 assert(InsertSize == 32 || InsertSize == 64);

4434 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);

4435 }

4436

4437

4438 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);

4439 break;

4440 }

4441 case AMDGPU::G_UNMERGE_VALUES: {

4443

4444

4445

4446 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

4448 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);

4449 }

4450 break;

4451 }

4452 case AMDGPU::G_AMDGPU_BUFFER_LOAD:

4453 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:

4454 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:

4455 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:

4456 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:

4457 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:

4458 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:

4459 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:

4460 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:

4461 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:

4462 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:

4463 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:

4464 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:

4465 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:

4466 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:

4467 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:

4468 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:

4469 case AMDGPU::G_AMDGPU_BUFFER_STORE:

4470 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:

4471 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:

4472 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:

4473 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {

4475

4476

4478

4479

4481

4482

4484

4485

4487

4488

4489

4490 break;

4491 }

4492 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:

4493 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:

4494 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:

4495 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:

4496 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:

4497 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:

4498 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:

4499 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:

4500 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:

4501 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:

4502 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:

4503 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:

4504 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32:

4505 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32:

4506 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:

4507 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:

4508 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {

4509

4511

4512

4514

4515

4517

4518

4520

4521

4523

4524

4526

4527

4528

4529 break;

4530 }

4531 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {

4532

4534

4535

4537

4538

4540

4541

4543

4544

4546

4547

4549

4550

4552

4553

4554

4555 break;

4556 }

4557 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:

4558 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:

4559 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:

4560 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:

4561 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {

4562

4563

4566

4567

4568

4569 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();

4570 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();

4571 unsigned ResultBank = regBankUnion(RSrcBank, OffsetBank);

4572

4573 unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4574 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);

4575 break;

4576 }

4577 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:

4580 break;

4581 case AMDGPU::G_INTRINSIC:

4582 case AMDGPU::G_INTRINSIC_CONVERGENT: {

4584 default:

4586 case Intrinsic::amdgcn_div_fmas:

4587 case Intrinsic::amdgcn_div_fixup:

4588 case Intrinsic::amdgcn_trig_preop:

4589 case Intrinsic::amdgcn_sin:

4590 case Intrinsic::amdgcn_cos:

4591 case Intrinsic::amdgcn_log_clamp:

4592 case Intrinsic::amdgcn_rcp_legacy:

4593 case Intrinsic::amdgcn_rsq_legacy:

4594 case Intrinsic::amdgcn_rsq_clamp:

4595 case Intrinsic::amdgcn_tanh:

4596 case Intrinsic::amdgcn_fmul_legacy:

4597 case Intrinsic::amdgcn_fma_legacy:

4598 case Intrinsic::amdgcn_frexp_mant:

4599 case Intrinsic::amdgcn_frexp_exp:

4600 case Intrinsic::amdgcn_fract:

4601 case Intrinsic::amdgcn_cvt_pknorm_i16:

4602 case Intrinsic::amdgcn_cvt_pknorm_u16:

4603 case Intrinsic::amdgcn_cvt_pk_i16:

4604 case Intrinsic::amdgcn_cvt_pk_u16:

4605 case Intrinsic::amdgcn_cvt_sr_pk_f16_f32:

4606 case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:

4607 case Intrinsic::amdgcn_cvt_pk_f16_fp8:

4608 case Intrinsic::amdgcn_cvt_pk_f16_bf8:

4609 case Intrinsic::amdgcn_cvt_pk_fp8_f16:

4610 case Intrinsic::amdgcn_cvt_pk_bf8_f16:

4611 case Intrinsic::amdgcn_cvt_sr_fp8_f16:

4612 case Intrinsic::amdgcn_cvt_sr_bf8_f16:

4613 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp8:

4614 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp8:

4615 case Intrinsic::amdgcn_cvt_scale_pk8_f16_bf8:

4616 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_bf8:

4617 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp4:

4618 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp4:

4619 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:

4620 case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:

4621 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:

4622 case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:

4623 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:

4624 case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:

4625 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:

4626 case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:

4627 case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:

4628 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:

4629 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:

4630 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:

4631 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16:

4632 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32:

4633 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32:

4634 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32:

4635 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16:

4636 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16:

4637 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f32:

4638 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f32:

4639 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f16:

4640 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f16:

4641 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_bf16:

4642 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_bf16:

4643 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_bf16:

4644 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_bf16:

4645 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f16:

4646 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f16:

4647 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f32:

4648 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f32:

4649 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f32:

4650 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f16:

4651 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_bf16:

4652 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f32:

4653 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f32:

4654 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f16:

4655 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f16:

4656 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_bf16:

4657 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:

4658 case Intrinsic::amdgcn_sat_pk4_i4_i8:

4659 case Intrinsic::amdgcn_sat_pk4_u4_u8:

4660 case Intrinsic::amdgcn_fmed3:

4661 case Intrinsic::amdgcn_cubeid:

4662 case Intrinsic::amdgcn_cubema:

4663 case Intrinsic::amdgcn_cubesc:

4664 case Intrinsic::amdgcn_cubetc:

4665 case Intrinsic::amdgcn_sffbh:

4666 case Intrinsic::amdgcn_fmad_ftz:

4667 case Intrinsic::amdgcn_mbcnt_lo:

4668 case Intrinsic::amdgcn_mbcnt_hi:

4669 case Intrinsic::amdgcn_mul_u24:

4670 case Intrinsic::amdgcn_mul_i24:

4671 case Intrinsic::amdgcn_mulhi_u24:

4672 case Intrinsic::amdgcn_mulhi_i24:

4673 case Intrinsic::amdgcn_lerp:

4674 case Intrinsic::amdgcn_sad_u8:

4675 case Intrinsic::amdgcn_msad_u8:

4676 case Intrinsic::amdgcn_sad_hi_u8:

4677 case Intrinsic::amdgcn_sad_u16:

4678 case Intrinsic::amdgcn_qsad_pk_u16_u8:

4679 case Intrinsic::amdgcn_mqsad_pk_u16_u8:

4680 case Intrinsic::amdgcn_mqsad_u32_u8:

4681 case Intrinsic::amdgcn_cvt_pk_u8_f32:

4682 case Intrinsic::amdgcn_alignbyte:

4683 case Intrinsic::amdgcn_perm:

4684 case Intrinsic::amdgcn_prng_b32:

4685 case Intrinsic::amdgcn_fdot2:

4686 case Intrinsic::amdgcn_sdot2:

4687 case Intrinsic::amdgcn_udot2:

4688 case Intrinsic::amdgcn_sdot4:

4689 case Intrinsic::amdgcn_udot4:

4690 case Intrinsic::amdgcn_sdot8:

4691 case Intrinsic::amdgcn_udot8:

4692 case Intrinsic::amdgcn_fdot2_bf16_bf16:

4693 case Intrinsic::amdgcn_fdot2_f16_f16:

4694 case Intrinsic::amdgcn_fdot2_f32_bf16:

4695 case Intrinsic::amdgcn_fdot2c_f32_bf16:

4696 case Intrinsic::amdgcn_sudot4:

4697 case Intrinsic::amdgcn_sudot8:

4698 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:

4699 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:

4700 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:

4701 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:

4702 case Intrinsic::amdgcn_cvt_f32_fp8:

4703 case Intrinsic::amdgcn_cvt_f32_fp8_e5m3:

4704 case Intrinsic::amdgcn_cvt_f32_bf8:

4705 case Intrinsic::amdgcn_cvt_off_f32_i4:

4706 case Intrinsic::amdgcn_cvt_pk_f32_fp8:

4707 case Intrinsic::amdgcn_cvt_pk_f32_bf8:

4708 case Intrinsic::amdgcn_cvt_pk_fp8_f32:

4709 case Intrinsic::amdgcn_cvt_pk_fp8_f32_e5m3:

4710 case Intrinsic::amdgcn_cvt_pk_bf8_f32:

4711 case Intrinsic::amdgcn_cvt_sr_fp8_f32:

4712 case Intrinsic::amdgcn_cvt_sr_fp8_f32_e5m3:

4713 case Intrinsic::amdgcn_cvt_sr_bf8_f32:

4714 case Intrinsic::amdgcn_cvt_sr_bf16_f32:

4715 case Intrinsic::amdgcn_cvt_sr_f16_f32:

4716 case Intrinsic::amdgcn_cvt_f16_fp8:

4717 case Intrinsic::amdgcn_cvt_f16_bf8:

4718 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:

4719 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:

4720 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:

4721 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16:

4722 case Intrinsic::amdgcn_cvt_scalef32_f16_fp8:

4723 case Intrinsic::amdgcn_cvt_scalef32_f16_bf8:

4724 case Intrinsic::amdgcn_cvt_scalef32_f32_fp8:

4725 case Intrinsic::amdgcn_cvt_scalef32_f32_bf8:

4726 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f32:

4727 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f32:

4728 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp8:

4729 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_bf8:

4730 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f16:

4731 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_bf16:

4732 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f16:

4733 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_bf16:

4734 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp4:

4735 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f32:

4736 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp4:

4737 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp4:

4738 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_fp6:

4739 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_bf6:

4740 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_bf6:

4741 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_bf6:

4742 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_fp6:

4743 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_fp6:

4744 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_bf8:

4745 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_bf8:

4746 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp8:

4747 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp8:

4748 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f16:

4749 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_bf16:

4750 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:

4751 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:

4752 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:

4753 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:

4754 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:

4755 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:

4756 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:

4757 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:

4758 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:

4759 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_bf16:

4760 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f16:

4761 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f32:

4762 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_bf16:

4763 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f16:

4764 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f32:

4765 case Intrinsic::amdgcn_ashr_pk_i8_i32:

4766 case Intrinsic::amdgcn_ashr_pk_u8_i32:

4767 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:

4768 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32:

4769 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:

4770 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:

4771 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:

4772 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:

4773 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:

4774 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:

4775 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:

4776 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:

4777 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:

4778 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:

4779 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:

4780 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:

4781 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:

4782 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:

4783 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:

4784 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:

4785 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:

4786 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:

4787 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:

4788 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:

4789 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:

4790 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:

4791 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:

4792 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:

4793 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:

4794 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:

4795 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:

4796 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:

4797 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:

4798 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:

4799 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8:

4800 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8:

4801 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8:

4802 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8:

4803 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8:

4804 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8:

4805 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8:

4806 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8:

4807 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8:

4808 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8:

4809 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8:

4810 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8:

4811 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8:

4812 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8:

4813 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8:

4814 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8:

4815 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:

4816 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:

4817 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:

4818 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:

4819 case Intrinsic::amdgcn_wmma_f32_32x16x128_f4:

4820 case Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4:

4821 case Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4:

4822 case Intrinsic::amdgcn_swmmac_f16_16x16x64_f16:

4823 case Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16:

4824 case Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16:

4825 case Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16:

4826 case Intrinsic::amdgcn_swmmac_f32_16x16x64_f16:

4827 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8:

4828 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8:

4829 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8:

4830 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8:

4831 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8:

4832 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8:

4833 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8:

4834 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8:

4835 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:

4836 case Intrinsic::amdgcn_perm_pk16_b4_u4:

4837 case Intrinsic::amdgcn_perm_pk16_b6_u4:

4838 case Intrinsic::amdgcn_perm_pk16_b8_u4:

4839 case Intrinsic::amdgcn_add_max_i32:

4840 case Intrinsic::amdgcn_add_max_u32:

4841 case Intrinsic::amdgcn_add_min_i32:

4842 case Intrinsic::amdgcn_add_min_u32:

4843 case Intrinsic::amdgcn_pk_add_max_i16:

4844 case Intrinsic::amdgcn_pk_add_max_u16:

4845 case Intrinsic::amdgcn_pk_add_min_i16:

4846 case Intrinsic::amdgcn_pk_add_min_u16:

4848 case Intrinsic::amdgcn_log:

4849 case Intrinsic::amdgcn_exp2:

4850 case Intrinsic::amdgcn_rcp:

4851 case Intrinsic::amdgcn_rsq:

4852 case Intrinsic::amdgcn_sqrt: {

4853 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4854 if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&

4858 }

4859 case Intrinsic::amdgcn_sbfe:

4860 case Intrinsic::amdgcn_ubfe:

4864 case Intrinsic::amdgcn_ds_swizzle:

4865 case Intrinsic::amdgcn_ds_permute:

4866 case Intrinsic::amdgcn_ds_bpermute:

4867 case Intrinsic::amdgcn_update_dpp:

4868 case Intrinsic::amdgcn_mov_dpp8:

4869 case Intrinsic::amdgcn_mov_dpp:

4870 case Intrinsic::amdgcn_strict_wwm:

4871 case Intrinsic::amdgcn_wwm:

4872 case Intrinsic::amdgcn_strict_wqm:

4873 case Intrinsic::amdgcn_wqm:

4874 case Intrinsic::amdgcn_softwqm:

4875 case Intrinsic::amdgcn_set_inactive:

4876 case Intrinsic::amdgcn_set_inactive_chain_arg:

4877 case Intrinsic::amdgcn_permlane64:

4878 case Intrinsic::amdgcn_ds_bpermute_fi_b32:

4880 case Intrinsic::amdgcn_cvt_pkrtz:

4884 case Intrinsic::amdgcn_kernarg_segment_ptr:

4885 case Intrinsic::amdgcn_s_getpc:

4886 case Intrinsic::amdgcn_groupstaticsize:

4887 case Intrinsic::amdgcn_reloc_constant:

4888 case Intrinsic::returnaddress: {

4889 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4890 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

4891 break;

4892 }

4893 case Intrinsic::amdgcn_wqm_vote: {

4894 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4895 OpdsMapping[0] = OpdsMapping[2]

4896 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);

4897 break;

4898 }

4899 case Intrinsic::amdgcn_ps_live: {

4900 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

4901 break;

4902 }

4903 case Intrinsic::amdgcn_div_scale: {

4904 unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4905 unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

4906 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);

4907 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);

4908

4909 unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();

4910 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);

4911 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);

4912 break;

4913 }

4914 case Intrinsic::amdgcn_class: {

4915 Register Src0Reg = MI.getOperand(2).getReg();

4916 Register Src1Reg = MI.getOperand(3).getReg();

4917 unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();

4918 unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();

4919 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4920 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);

4921 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);

4922 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);

4923 break;

4924 }

4925 case Intrinsic::amdgcn_icmp:

4926 case Intrinsic::amdgcn_fcmp: {

4927 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4928

4929 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);

4930 unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

4931 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);

4932 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);

4933 break;

4934 }

4935 case Intrinsic::amdgcn_readlane: {

4936

4937 Register IdxReg = MI.getOperand(3).getReg();

4938 unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();

4939 unsigned IdxBank = getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);

4940 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);

4941 [[fallthrough]];

4942 }

4943 case Intrinsic::amdgcn_readfirstlane: {

4944 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4945 unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

4946 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);

4947 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);

4948 break;

4949 }

4950 case Intrinsic::amdgcn_writelane: {

4951 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

4952 Register SrcReg = MI.getOperand(2).getReg();

4953 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();

4954 unsigned SrcBank = getRegBankID(SrcReg, MRI, AMDGPU::SGPRRegBankID);

4955 Register IdxReg = MI.getOperand(3).getReg();

4956 unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();

4957 unsigned IdxBank = getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);

4958 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

4959

4960

4961

4962 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);

4963 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);

4964 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);

4965 break;

4966 }

4967 case Intrinsic::amdgcn_if_break: {

4969 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

4970 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

4971 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

4972 break;

4973 }

4974 case Intrinsic::amdgcn_permlane16:

4975 case Intrinsic::amdgcn_permlanex16: {

4977 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4978 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4979 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4982 break;

4983 }

4984 case Intrinsic::amdgcn_permlane_bcast:

4985 case Intrinsic::amdgcn_permlane_up:

4986 case Intrinsic::amdgcn_permlane_down:

4987 case Intrinsic::amdgcn_permlane_xor: {

4989 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4990 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4993 break;

4994 }

4995 case Intrinsic::amdgcn_permlane_idx_gen: {

4997 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

4998 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5000 break;

5001 }

5002 case Intrinsic::amdgcn_permlane16_var:

5003 case Intrinsic::amdgcn_permlanex16_var: {

5005 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5006 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5007 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5008 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5009 break;

5010 }

5011 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:

5012 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:

5013 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:

5014 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:

5015 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:

5016 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:

5017 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:

5018 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:

5019 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:

5020 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:

5021 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:

5022 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:

5023 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:

5024 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:

5025 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:

5026 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:

5027 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:

5028 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:

5029 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:

5030 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:

5031 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:

5032 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:

5033 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:

5034 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:

5035 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:

5036 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:

5037 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:

5038 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:

5039 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:

5040 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:

5041 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:

5042 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:

5043 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:

5044 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:

5045 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:

5046 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:

5047 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:

5048 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:

5049 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8:

5050 case Intrinsic::amdgcn_mfma_f32_16x16x32_f16:

5051 case Intrinsic::amdgcn_mfma_f32_32x32x16_f16:

5052 case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:

5053 case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:

5054 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {

5055 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5056 unsigned MinNumRegsRequired = DstSize / 32;

5057

5058

5059

5060

5061

5062

5063

5065

5066 bool UseAGPRForm = Subtarget.hasGFX90AInsts() ||

5067 Info->selectAGPRFormMFMA(MinNumRegsRequired);

5068

5069 OpdsMapping[0] =

5074 OpdsMapping[4] =

5077 break;

5078 }

5079 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:

5080 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {

5081 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5082 unsigned MinNumRegsRequired = DstSize / 32;

5083

5086

5087 OpdsMapping[0] =

5090

5093 OpdsMapping[4] =

5096

5099 break;

5100 }

5101 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:

5102 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:

5103 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:

5104 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:

5105 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:

5106 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:

5107 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:

5108 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:

5109 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:

5110 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:

5111 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:

5112 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:

5113 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:

5114 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:

5115 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:

5116 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:

5117 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:

5118 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:

5119 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:

5120 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:

5121 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:

5122 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:

5123 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:

5124 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:

5125 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:

5126 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:

5127 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:

5128 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {

5129 Register DstReg = MI.getOperand(0).getReg();

5130 unsigned DstSize = MRI.getType(DstReg).getSizeInBits();

5131 unsigned MinNumRegsRequired = DstSize / 32;

5134

5135

5138

5141 OpdsMapping[4] =

5145 break;

5146 }

5147 case Intrinsic::amdgcn_interp_p1:

5148 case Intrinsic::amdgcn_interp_p2:

5149 case Intrinsic::amdgcn_interp_mov:

5150 case Intrinsic::amdgcn_interp_p1_f16:

5151 case Intrinsic::amdgcn_interp_p2_f16:

5152 case Intrinsic::amdgcn_lds_param_load: {

5153 const int M0Idx = MI.getNumOperands() - 1;

5154 Register M0Reg = MI.getOperand(M0Idx).getReg();

5155 unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);

5156 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5157

5158 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

5159 for (int I = 2; I != M0Idx && MI.getOperand(I).isReg(); ++I)

5160 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5161

5162

5163

5164 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);

5165 break;

5166 }

5167 case Intrinsic::amdgcn_interp_inreg_p10:

5168 case Intrinsic::amdgcn_interp_inreg_p2:

5169 case Intrinsic::amdgcn_interp_inreg_p10_f16:

5170 case Intrinsic::amdgcn_interp_inreg_p2_f16:

5171 case Intrinsic::amdgcn_interp_p10_rtz_f16:

5172 case Intrinsic::amdgcn_interp_p2_rtz_f16: {

5173 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5174 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

5175 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5176 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5177 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5178 break;

5179 }

5180 case Intrinsic::amdgcn_permlane16_swap:

5181 case Intrinsic::amdgcn_permlane32_swap: {

5182 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5183 OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] =

5184 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

5185 break;

5186 }

5187 case Intrinsic::amdgcn_ballot: {

5188 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5189 unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

5190 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);

5191 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);

5192 break;

5193 }

5194 case Intrinsic::amdgcn_inverse_ballot: {

5195

5196 Register MaskReg = MI.getOperand(2).getReg();

5197 unsigned MaskSize = MRI.getType(MaskReg).getSizeInBits();

5198 unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);

5199 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

5200 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);

5201 break;

5202 }

5203 case Intrinsic::amdgcn_bitop3: {

5205 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5206 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5207 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5208 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5209 break;

5210 }

5211 case Intrinsic::amdgcn_s_quadmask:

5212 case Intrinsic::amdgcn_s_wqm: {

5213 Register MaskReg = MI.getOperand(2).getReg();

5214 unsigned MaskSize = MRI.getType(MaskReg).getSizeInBits();

5215 unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);

5216 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);

5217 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);

5218 break;

5219 }

5220 case Intrinsic::amdgcn_wave_reduce_add:

5221 case Intrinsic::amdgcn_wave_reduce_fadd:

5222 case Intrinsic::amdgcn_wave_reduce_sub:

5223 case Intrinsic::amdgcn_wave_reduce_fsub:

5224 case Intrinsic::amdgcn_wave_reduce_min:

5225 case Intrinsic::amdgcn_wave_reduce_umin:

5226 case Intrinsic::amdgcn_wave_reduce_fmin:

5227 case Intrinsic::amdgcn_wave_reduce_max:

5228 case Intrinsic::amdgcn_wave_reduce_umax:

5229 case Intrinsic::amdgcn_wave_reduce_fmax:

5230 case Intrinsic::amdgcn_wave_reduce_and:

5231 case Intrinsic::amdgcn_wave_reduce_or:

5232 case Intrinsic::amdgcn_wave_reduce_xor: {

5233 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5234 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);

5235 unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

5236 auto regBankID =

5237 isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;

5238 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);

5239 break;

5240 }

5241 case Intrinsic::amdgcn_s_bitreplicate:

5242 Register MaskReg = MI.getOperand(2).getReg();

5243 unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);

5244 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);

5245 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);

5246 }

5247 break;

5248 }

5249 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:

5250 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:

5251 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:

5252 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:

5253 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {

5256 assert(RSrcIntrin && "missing RsrcIntrinsic for image intrinsic");

5257

5258

5259

5262 }

5263 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:

5264 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:

5265 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {

5266 bool IsDualOrBVH8 =

5267 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||

5268 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;

5269 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;

5270 unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;

5271 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5272 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

5273 if (IsDualOrBVH8) {

5274 OpdsMapping[1] = AMDGPU::getValueMapping(

5275 AMDGPU::VGPRRegBankID,

5276 MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());

5277 OpdsMapping[2] = AMDGPU::getValueMapping(

5278 AMDGPU::VGPRRegBankID,

5279 MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());

5280 }

5281 OpdsMapping[LastRegOpIdx] =

5283 if (LastRegOpIdx == 3) {

5284

5285 unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

5286 if (Size > 256)

5288 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5289 } else {

5290

5291 unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;

5292 for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {

5293 unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();

5294 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);

5295 }

5296 }

5297 break;

5298 }

5299 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:

5300 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {

5302 switch (IntrID) {

5303 case Intrinsic::amdgcn_s_getreg:

5304 case Intrinsic::amdgcn_s_memtime:

5305 case Intrinsic::amdgcn_s_memrealtime:

5306 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:

5307 case Intrinsic::amdgcn_s_sendmsg_rtn: {

5308 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5309 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

5310 break;

5311 }

5312 case Intrinsic::amdgcn_global_atomic_csub:

5313 case Intrinsic::amdgcn_global_atomic_fmin_num:

5314 case Intrinsic::amdgcn_global_atomic_fmax_num:

5315 case Intrinsic::amdgcn_flat_atomic_fmin_num:

5316 case Intrinsic::amdgcn_flat_atomic_fmax_num:

5317 case Intrinsic::amdgcn_atomic_cond_sub_u32:

5318 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:

5319 case Intrinsic::amdgcn_global_load_tr_b64:

5320 case Intrinsic::amdgcn_global_load_tr_b128:

5321 case Intrinsic::amdgcn_global_load_tr4_b64:

5322 case Intrinsic::amdgcn_global_load_tr6_b96:

5323 case Intrinsic::amdgcn_ds_load_tr8_b64:

5324 case Intrinsic::amdgcn_ds_load_tr16_b128:

5325 case Intrinsic::amdgcn_ds_load_tr4_b64:

5326 case Intrinsic::amdgcn_ds_load_tr6_b96:

5327 case Intrinsic::amdgcn_flat_load_monitor_b32:

5328 case Intrinsic::amdgcn_flat_load_monitor_b64:

5329 case Intrinsic::amdgcn_flat_load_monitor_b128:

5330 case Intrinsic::amdgcn_global_load_monitor_b32:

5331 case Intrinsic::amdgcn_global_load_monitor_b64:

5332 case Intrinsic::amdgcn_global_load_monitor_b128:

5333 case Intrinsic::amdgcn_ds_read_tr4_b64:

5334 case Intrinsic::amdgcn_ds_read_tr6_b96:

5335 case Intrinsic::amdgcn_ds_read_tr8_b64:

5336 case Intrinsic::amdgcn_ds_read_tr16_b64:

5337 case Intrinsic::amdgcn_ds_atomic_async_barrier_arrive_b64:

5338 case Intrinsic::amdgcn_ds_atomic_barrier_arrive_rtn_b64:

5340 case Intrinsic::amdgcn_ds_ordered_add:

5341 case Intrinsic::amdgcn_ds_ordered_swap: {

5342 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5343 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

5345 AMDGPU::SGPRRegBankID);

5346 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);

5347 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5348 break;

5349 }

5350 case Intrinsic::amdgcn_ds_append:

5351 case Intrinsic::amdgcn_ds_consume: {

5352 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5353 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

5355 break;

5356 }

5357 case Intrinsic::amdgcn_exp_compr:

5358 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5359 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5360 break;

5361 case Intrinsic::amdgcn_exp:

5362

5363 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5364 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5365 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5366 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5367 break;

5368 case Intrinsic::amdgcn_exp_row:

5369 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5370 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5371 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5372 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5374 break;

5375 case Intrinsic::amdgcn_s_sendmsg:

5376 case Intrinsic::amdgcn_s_sendmsghalt: {

5377

5379 AMDGPU::SGPRRegBankID);

5380 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);

5381 break;

5382 }

5383 case Intrinsic::amdgcn_s_setreg: {

5384

5386 AMDGPU::SGPRRegBankID);

5387 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);

5388 break;

5389 }

5390 case Intrinsic::amdgcn_s_ttracedata: {

5391

5392 unsigned Bank =

5393 getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID);

5394 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);

5395 break;

5396 }

5397 case Intrinsic::amdgcn_end_cf: {

5399 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

5400 break;

5401 }

5402 case Intrinsic::amdgcn_else: {

5404 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

5405 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);

5406 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);

5407 break;

5408 }

5409 case Intrinsic::amdgcn_init_whole_wave:

5410 case Intrinsic::amdgcn_live_mask: {

5411 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

5412 break;

5413 }

5414 case Intrinsic::amdgcn_wqm_demote:

5415 case Intrinsic::amdgcn_kill: {

5416 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

5417 break;

5418 }

5419 case Intrinsic::amdgcn_raw_buffer_load:

5420 case Intrinsic::amdgcn_raw_ptr_buffer_load:

5421 case Intrinsic::amdgcn_raw_atomic_buffer_load:

5422 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:

5423 case Intrinsic::amdgcn_raw_tbuffer_load:

5424 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {

5425

5426

5431 break;

5432 }

5433 case Intrinsic::amdgcn_raw_buffer_load_lds:

5434 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {

5439 break;

5440 }

5441 case Intrinsic::amdgcn_raw_buffer_store:

5442 case Intrinsic::amdgcn_raw_ptr_buffer_store:

5443 case Intrinsic::amdgcn_raw_buffer_store_format:

5444 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:

5445 case Intrinsic::amdgcn_raw_tbuffer_store:

5446 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {

5451 break;

5452 }

5453 case Intrinsic::amdgcn_struct_buffer_load:

5454 case Intrinsic::amdgcn_struct_ptr_buffer_load:

5455 case Intrinsic::amdgcn_struct_tbuffer_load:

5456 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:

5457 case Intrinsic::amdgcn_struct_atomic_buffer_load:

5458 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {

5464 break;

5465 }

5466 case Intrinsic::amdgcn_struct_buffer_load_lds:

5467 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {

5473 break;

5474 }

5475 case Intrinsic::amdgcn_struct_buffer_store:

5476 case Intrinsic::amdgcn_struct_ptr_buffer_store:

5477 case Intrinsic::amdgcn_struct_tbuffer_store:

5478 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {

5484 break;

5485 }

5486 case Intrinsic::amdgcn_init_exec_from_input: {

5488 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

5489 break;

5490 }

5491 case Intrinsic::amdgcn_ds_gws_init:

5492 case Intrinsic::amdgcn_ds_gws_barrier:

5493 case Intrinsic::amdgcn_ds_gws_sema_br: {

5494 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5495

5496

5498 AMDGPU::SGPRRegBankID);

5499 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);

5500 break;

5501 }

5502 case Intrinsic::amdgcn_ds_gws_sema_v:

5503 case Intrinsic::amdgcn_ds_gws_sema_p:

5504 case Intrinsic::amdgcn_ds_gws_sema_release_all: {

5505

5507 AMDGPU::SGPRRegBankID);

5508 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);

5509 break;

5510 }

5511 case Intrinsic::amdgcn_cluster_load_b32:

5512 case Intrinsic::amdgcn_cluster_load_b64:

5513 case Intrinsic::amdgcn_cluster_load_b128: {

5516 unsigned M0Bank =

5517 getRegBankID(MI.getOperand(4).getReg(), MRI, AMDGPU::SGPRRegBankID);

5518 OpdsMapping[4] = AMDGPU::getValueMapping(M0Bank, 32);

5519 break;

5520 }

5521 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:

5522 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:

5523 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:

5524 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {

5527 unsigned M0Bank =

5528 getRegBankID(MI.getOperand(5).getReg(), MRI, AMDGPU::SGPRRegBankID);

5529 OpdsMapping[5] = AMDGPU::getValueMapping(M0Bank, 32);

5530 break;

5531 }

5532 case Intrinsic::amdgcn_global_store_async_from_lds_b8:

5533 case Intrinsic::amdgcn_global_store_async_from_lds_b32:

5534 case Intrinsic::amdgcn_global_store_async_from_lds_b64:

5535 case Intrinsic::amdgcn_global_store_async_from_lds_b128:

5536 case Intrinsic::amdgcn_global_load_async_to_lds_b8:

5537 case Intrinsic::amdgcn_global_load_async_to_lds_b32:

5538 case Intrinsic::amdgcn_global_load_async_to_lds_b64:

5539 case Intrinsic::amdgcn_global_load_async_to_lds_b128:

5540 case Intrinsic::amdgcn_load_to_lds:

5541 case Intrinsic::amdgcn_global_load_lds: {

5544 break;

5545 }

5546 case Intrinsic::amdgcn_lds_direct_load: {

5547 const int M0Idx = MI.getNumOperands() - 1;

5548 Register M0Reg = MI.getOperand(M0Idx).getReg();

5549 unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);

5550 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5551

5552 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);

5553 for (int I = 2; I != M0Idx && MI.getOperand(I).isReg(); ++I)

5554 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);

5555

5556

5557

5558 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);

5559 break;

5560 }

5561 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:

5562 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:

5565 break;

5566 case Intrinsic::amdgcn_ds_bvh_stack_rtn:

5567 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:

5568 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:

5569 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn: {

5570 OpdsMapping[0] =

5572 OpdsMapping[1] =

5574 OpdsMapping[3] =

5576 OpdsMapping[4] =

5578 OpdsMapping[5] =

5580 break;

5581 }

5582 case Intrinsic::amdgcn_s_sleep_var:

5584 break;

5585 case Intrinsic::amdgcn_s_barrier_join:

5587 break;

5588 case Intrinsic::amdgcn_s_barrier_init:

5589 case Intrinsic::amdgcn_s_barrier_signal_var:

5592 break;

5593 case Intrinsic::amdgcn_s_barrier_signal_isfirst: {

5594 const unsigned ResultSize = 1;

5595 OpdsMapping[0] =

5596 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);

5597 break;

5598 }

5599 case Intrinsic::amdgcn_s_get_barrier_state:

5600 case Intrinsic::amdgcn_s_get_named_barrier_state: {

5603 break;

5604 }

5605 case Intrinsic::amdgcn_pops_exiting_wave_id:

5607 case Intrinsic::amdgcn_tensor_load_to_lds_d2:

5608 case Intrinsic::amdgcn_tensor_store_from_lds_d2:

5609 case Intrinsic::amdgcn_tensor_load_to_lds:

5610 case Intrinsic::amdgcn_tensor_store_from_lds: {

5611

5612

5613 for (unsigned I = 1; I < MI.getNumOperands(); ++I) {

5614 if (MI.getOperand(I).isReg()) {

5615 Register Reg = MI.getOperand(I).getReg();

5618 OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size);

5619 }

5620 }

5621 break;

5622 }

5623 case Intrinsic::amdgcn_s_prefetch_data: {

5626 break;

5627 }

5628 case Intrinsic::amdgcn_flat_prefetch:

5629 case Intrinsic::amdgcn_global_prefetch:

5631 default:

5633 }

5634 break;

5635 }

5636 case AMDGPU::G_SELECT: {

5637 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

5639 AMDGPU::SGPRRegBankID);

5641 AMDGPU::SGPRRegBankID);

5642 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&

5643 Op3Bank == AMDGPU::SGPRRegBankID;

5644

5645 unsigned CondBankDefault = SGPRSrcs ?

5646 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;

5647 unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI,

5648 CondBankDefault);

5649 if (CondBank == AMDGPU::SGPRRegBankID)

5650 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;

5651 else if (CondBank == AMDGPU::VGPRRegBankID)

5652 CondBank = AMDGPU::VCCRegBankID;

5653

5654 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?

5655 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;

5656

5657 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);

5658

5659

5660 if (Size == 64) {

5661 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);

5662 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);

5663 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);

5664 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);

5665 } else {

5666 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);

5667 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);

5668 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);

5669 OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);

5670 }

5671

5672 break;

5673 }

5674

5675 case AMDGPU::G_SI_CALL: {

5676 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);

5677

5678

5680

5681

5682 for (unsigned I = 4; I < MI.getNumOperands(); ++I) {

5683 if (MI.getOperand(I).isReg()) {

5684 Register Reg = MI.getOperand(I).getReg();

5687 OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size);

5688 }

5689 }

5690 break;

5691 }

5692 case AMDGPU::G_LOAD:

5693 case AMDGPU::G_ZEXTLOAD:

5694 case AMDGPU::G_SEXTLOAD:

5696

5697 case AMDGPU::G_ATOMICRMW_XCHG:

5698 case AMDGPU::G_ATOMICRMW_ADD:

5699 case AMDGPU::G_ATOMICRMW_SUB:

5700 case AMDGPU::G_ATOMICRMW_AND:

5701 case AMDGPU::G_ATOMICRMW_OR:

5702 case AMDGPU::G_ATOMICRMW_XOR:

5703 case AMDGPU::G_ATOMICRMW_MAX:

5704 case AMDGPU::G_ATOMICRMW_MIN:

5705 case AMDGPU::G_ATOMICRMW_UMAX:

5706 case AMDGPU::G_ATOMICRMW_UMIN:

5707 case AMDGPU::G_ATOMICRMW_FADD:

5708 case AMDGPU::G_ATOMICRMW_FMIN:

5709 case AMDGPU::G_ATOMICRMW_FMAX:

5710 case AMDGPU::G_ATOMICRMW_UINC_WRAP:

5711 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:

5712 case AMDGPU::G_ATOMICRMW_USUB_COND:

5713 case AMDGPU::G_ATOMICRMW_USUB_SAT:

5714 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {

5718 break;

5719 }

5720 case AMDGPU::G_ATOMIC_CMPXCHG: {

5725 break;

5726 }

5727 case AMDGPU::G_BRCOND: {

5729 AMDGPU::SGPRRegBankID);

5730 assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);

5731 if (Bank != AMDGPU::SGPRRegBankID)

5732 Bank = AMDGPU::VCCRegBankID;

5733

5734 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);

5735 break;

5736 }

5737 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:

5739 case AMDGPU::G_PREFETCH:

5741 break;

5742 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP:

5743 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN:

5744 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);

5745 break;

5746 }

5747

5750 MI.getNumOperands());

5751}

unsigned const MachineRegisterInfo * MRI

static unsigned getIntrinsicID(const SDNode *N)

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

AMDGPU Register Bank Select

static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)

Definition AMDGPURegisterBankInfo.cpp:1758

static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)

Definition AMDGPURegisterBankInfo.cpp:3574

static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)

Definition AMDGPURegisterBankInfo.cpp:1797

static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)

Definition AMDGPURegisterBankInfo.cpp:2020

static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)

Definition AMDGPURegisterBankInfo.cpp:1736

static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)

Implement extending a 32-bit value to a 64-bit value.

Definition AMDGPURegisterBankInfo.cpp:1912

static unsigned getExtendOp(unsigned Opc)

Definition AMDGPURegisterBankInfo.cpp:1718

static bool isVectorRegisterBank(const RegisterBank &Bank)

Definition AMDGPURegisterBankInfo.cpp:222

static unsigned regBankUnion(unsigned RB0, unsigned RB1)

Definition AMDGPURegisterBankInfo.cpp:3559

static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)

Split Ty into 2 pieces.

Definition AMDGPURegisterBankInfo.cpp:1016

static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)

Replace the current type each register in Regs has with NewTy.

Definition AMDGPURegisterBankInfo.cpp:680

static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)

Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.

Definition AMDGPURegisterBankInfo.cpp:1891

static LLT widen96To128(LLT Ty)

Definition AMDGPURegisterBankInfo.cpp:1032

static LLT getHalfSizedType(LLT Ty)

Definition AMDGPURegisterBankInfo.cpp:688

static unsigned getSBufferLoadCorrespondingBufferLoadOpcode(unsigned Opc)

Definition AMDGPURegisterBankInfo.cpp:1326

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Contains matchers for matching SSA Machine Instructions.

This file declares the MachineIRBuilder class.

Promote Memory to Register

static bool isReg(const MCInst &MI, unsigned OpNo)

MachineInstr unsigned OpIdx

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

static constexpr MCPhysReg SPReg

Interface definition for SIRegisterInfo.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:1159

std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const

Definition AMDGPURegisterBankInfo.cpp:1811

bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const

Definition AMDGPURegisterBankInfo.cpp:972

const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const

Definition AMDGPURegisterBankInfo.cpp:3685

InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const

unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override

Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.

Definition AMDGPURegisterBankInfo.cpp:231

RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const

Definition AMDGPURegisterBankInfo.cpp:384

bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const

Definition AMDGPURegisterBankInfo.cpp:1856

bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const

Legalize instruction MI where operands in OpIndices must be SGPRs.

Definition AMDGPURegisterBankInfo.cpp:773

const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override

Get a register bank that covers RC.

Definition AMDGPURegisterBankInfo.cpp:288

AMDGPURegisterBankInfo(const GCNSubtarget &STI)

Definition AMDGPURegisterBankInfo.cpp:205

bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const

Definition AMDGPURegisterBankInfo.cpp:1568

unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const

Definition AMDGPURegisterBankInfo.cpp:3791

Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const

Handle register layout difference for f16 images for some subtargets.

Definition AMDGPURegisterBankInfo.cpp:1771

const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:3744

void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override

See RegisterBankInfo::applyMapping.

Definition AMDGPURegisterBankInfo.cpp:2191

bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const

Definition AMDGPURegisterBankInfo.cpp:1462

bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const

Definition AMDGPURegisterBankInfo.cpp:1213

const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const

Definition AMDGPURegisterBankInfo.cpp:3810

bool isScalarLoadLegal(const MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:442

unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const

Definition AMDGPURegisterBankInfo.cpp:1243

const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const

Definition AMDGPURegisterBankInfo.cpp:3799

bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:1041

void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const

Split 64-bit value Reg into two 32-bit halves and populate them into Regs.

Definition AMDGPURegisterBankInfo.cpp:657

const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const

Return the mapping for a pointer argument.

Definition AMDGPURegisterBankInfo.cpp:3729

unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:3590

RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const

Definition AMDGPURegisterBankInfo.cpp:344

bool isDivergentRegBank(const RegisterBank *RB) const override

Returns true if the register bank is considered divergent.

Definition AMDGPURegisterBankInfo.cpp:227

void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const

Definition AMDGPURegisterBankInfo.cpp:1002

InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override

Get the alternative mappings for MI.

Definition AMDGPURegisterBankInfo.cpp:468

const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:3624

const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:3666

const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override

This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...

Definition AMDGPURegisterBankInfo.cpp:3835

unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override

Get the cost of using ValMapping to decompose a register.

Definition AMDGPURegisterBankInfo.cpp:262

const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const

Definition AMDGPURegisterBankInfo.cpp:3818

const GCNSubtarget & Subtarget

const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:3642

bool isSALUMapping(const MachineInstr &MI) const

Definition AMDGPURegisterBankInfo.cpp:3608

Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const

Definition AMDGPURegisterBankInfo.cpp:701

bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const

Definition AMDGPURegisterBankInfo.cpp:1344

void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const

Definition AMDGPURegisterBankInfo.cpp:2124

const SIRegisterInfo * TRI

static const LaneMaskConstants & get(const GCNSubtarget &ST)

const unsigned XorTermOpc

const unsigned MovTermOpc

const unsigned AndSaveExecOpc

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ ICMP_SLT

signed less than

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

static constexpr ElementCount getFixed(ScalarTy MinVal)

Abstract class that contains various methods for clients to notify about changes.

constexpr unsigned getScalarSizeInBits() const

constexpr bool isScalar() const

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

constexpr uint16_t getNumElements() const

Returns the number of elements in a vector LLT.

constexpr bool isVector() const

constexpr TypeSize getSizeInBits() const

Returns the total size of the type. Must only be called on sized types.

constexpr LLT getElementType() const

Returns the vector's element type. Only valid for vector types.

constexpr unsigned getAddressSpace() const

static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)

Get a low-level fixed-width vector of some number of elements and element width.

constexpr LLT getScalarType() const

static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)

constexpr LLT divide(int Factor) const

Return a type that is Factor times smaller.

This is an important class for using LLVM in a threaded context.

LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)

Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...

LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)

LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)

Legalize an instruction by reducing the width of the underlying scalar type.

LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)

@ Legalized

Instruction has been legalized and the MachineFunction changed.

LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)

Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...

LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)

Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...

LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)

Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...

TypeSize getValue() const

LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)

Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...

LLVM_ABI iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())

Add Succ as a successor of this MachineBasicBlock.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

MachineInstrBundleIterator< MachineInstr > iterator

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

BasicBlockListType::iterator iterator

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)

CreateMachineInstr - Allocate a new MachineInstr.

void insert(iterator MBBI, MachineBasicBlock *MBB)

Helper class to build MachineInstr.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...

MachineBasicBlock::iterator begin()

MachineBasicBlock::iterator end()

Representation of each machine instruction.

const MachineBasicBlock * getParent() const

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

LocationSize getSize() const

Return the size in bytes of the memory reference.

unsigned getAddrSpace() const

bool isAtomic() const

Returns true if this operation has an atomic ordering requirement of unordered or higher,...

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

@ MOLoad

The memory access reads data.

@ MOInvariant

The memory access always returns the same value (or traps).

Flags getFlags() const

Return the raw flags of the source value,.

LLVM_ABI Align getAlign() const

Return the minimum known alignment in bytes of the actual memory reference.

MachineOperand class - Representation of each machine instruction operand.

LLVM_ABI void setReg(Register Reg)

Change the register this operand corresponds to.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Helper class that represents how the value of an instruction may be mapped and what is the related co...

bool isValid() const

Check whether this object is valid.

Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...

const InstructionMapping & getInstrMapping() const

The final mapping of the instruction.

MachineInstr & getMI() const

MachineRegisterInfo & getMRI() const

The MachineRegisterInfo we used to realize the mapping.

iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const

Get all the virtual registers required to map the OpIdx-th operand of the instruction.

virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const

Get the alternative mappings for MI.

static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)

Constrain the (possibly generic) virtual register Reg to RC.

const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const

Method to get a uniquely generated InstructionMapping.

static void applyDefaultMapping(const OperandsMapper &OpdMapper)

Helper method to apply something that is like the default mapping.

const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const

The most common ValueMapping consists of a single PartialMapping.

const InstructionMapping & getInvalidInstructionMapping() const

Method to get a uniquely generated invalid InstructionMapping.

const RegisterBank & getRegBank(unsigned ID)

Get the register bank identified by ID.

const unsigned * Sizes

Hold the sizes of the register banks for all HwModes.

bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const

TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const

Get the size in bits of Reg.

const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const

Get the uniquely generated array of ValueMapping for the elements of between Begin and End.

SmallVector< const InstructionMapping *, 4 > InstructionMappings

Convenient type to represent the alternatives for mapping an instruction.

virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const

Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.

const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const

Try to get the mapping of MI.

This class implements the register bank concept.

unsigned getID() const

Get the identifier of this register bank.

Wrapper class representing virtual and physical registers.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

bool selectAGPRFormMFMA(unsigned NumRegs) const

Return true if an MFMA that requires at least NumRegs should select to the AGPR form,...

static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)

Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (, var-idx) should be expanded into a set of cmp...

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

static constexpr TypeSize getFixed(ScalarTy ExactSize)

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

self_iterator getIterator()

A range adaptor for a pair of iterators.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ PRIVATE_ADDRESS

Address space for private memory.

@ BUFFER_RESOURCE

Address space for 128-bit buffer resources.

bool isFlatGlobalAddrSpace(unsigned AS)

bool isUniformMMO(const MachineMemOperand *MMO)

bool isExtendedGlobalAddrSpace(unsigned AS)

Intrinsic::ID getIntrinsicID(const MachineInstr &I)

Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.

std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)

Returns base register and constant offset.

const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)

operand_type_match m_Reg()

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

ConstantMatch< APInt > m_ICst(APInt &Cst)

BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)

Matches a RequestedValue constant or a constant splat of RequestedValue.

@ Kill

The last use of a register.

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)

See if Reg is defined by an single def instruction that is Opcode.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)

Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)

If VReg is defined by a G_CONSTANT fits in int64_t returns it.

static const MachineMemOperand::Flags MONoClobber

Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...

auto reverse(ContainerTy &&C)

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

DWARFExpression::Operation Op

void call_once(once_flag &flag, Function &&F, Args &&... ArgList)

Execute the function specified as a parameter once.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...

Align assumeAligned(uint64_t Value)

Treats the value 0 as a 1, so Align is always at least 1.

unsigned Log2(Align A)

Returns the log2 of the alignment.

LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the source register for Reg, folding away any trivial copies.

constexpr T maskTrailingOnes(unsigned N)

Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.

@ Default

The result values are uniform if and only if all operands are uniform.

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

This class contains a discriminated union of information about pointers in memory operands,...

unsigned StartIdx

Number of bits at which this partial mapping starts in the original value.

const RegisterBank * RegBank

Register bank where the partial value lives.

unsigned Length

Length of this mapping in bits.

Helper struct that represents how a value is mapped through different register banks.

unsigned NumBreakDowns

Number of partial mapping to break down this value.

const PartialMapping * BreakDown

How the value is broken down between the different register banks.

The llvm::once_flag structure.