LLVM: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

29#include "llvm/IR/IntrinsicsAMDGPU.h"

30#include

31

32#define DEBUG_TYPE "amdgpu-isel"

33

34using namespace llvm;

35using namespace MIPatternMatch;

36

37#define GET_GLOBALISEL_IMPL

38#define AMDGPUSubtarget GCNSubtarget

39#include "AMDGPUGenGlobalISel.inc"

40#undef GET_GLOBALISEL_IMPL

41#undef AMDGPUSubtarget

42

46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),

47 STI(STI),

49#include "AMDGPUGenGlobalISel.inc"

52#include "AMDGPUGenGlobalISel.inc"

54{

55}

56

58

67}

68

69

71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS

72 ? Def->getOperand(1).getReg()

74}

75

76bool AMDGPUInstructionSelector::isVCC(Register Reg,

78

79 if (Reg.isPhysical())

80 return false;

81

82 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);

84 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

85 if (RC) {

86 const LLT Ty = MRI.getType(Reg);

88 return false;

89

90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&

92 }

93

94 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);

95 return RB->getID() == AMDGPU::VCCRegBankID;

96}

97

98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,

99 unsigned NewOpc) const {

100 MI.setDesc(TII.get(NewOpc));

101 MI.removeOperand(1);

103

106

107

109 return false;

110

115 if (!DstRC || DstRC != SrcRC)

116 return false;

117

120}

121

122bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {

125 I.setDesc(TII.get(TargetOpcode::COPY));

126

129 Register DstReg = Dst.getReg();

130 Register SrcReg = Src.getReg();

131

132 if (isVCC(DstReg, *MRI)) {

133 if (SrcReg == AMDGPU::SCC) {

136 if (!RC)

137 return true;

139 }

140

141 if (!isVCC(SrcReg, *MRI)) {

142

144 return false;

145

148

149 std::optional ConstVal =

151 if (ConstVal) {

152 unsigned MovOpc =

153 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;

154 BuildMI(*BB, &I, DL, TII.get(MovOpc), DstReg)

155 .addImm(ConstVal->Value.getBoolValue() ? -1 : 0);

156 } else {

157 Register MaskedReg = MRI->createVirtualRegister(SrcRC);

158

159

160

161

162

165 const int64_t NoMods = 0;

166 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)

172 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)

178 } else {

180 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;

181 auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)

184 if (IsSGPR)

185 And.setOperandDead(3);

186

187 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)

190 }

191 }

192

193 if (MRI->getRegClassOrNull(SrcReg))

194 MRI->setRegClass(SrcReg, SrcRC);

195 I.eraseFromParent();

196 return true;

197 }

198

202 return false;

203

204 return true;

205 }

206

208 if (MO.getReg().isPhysical())

209 continue;

210

213 if (!RC)

214 continue;

216 }

217 return true;

218}

219

220bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const {

223

224 unsigned CmpOpc =

225 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;

227 .addReg(I.getOperand(1).getReg())

230 return false;

231

232 Register DstReg = I.getOperand(0).getReg();

233 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(AMDGPU::SCC);

234

235 I.eraseFromParent();

237}

238

239bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(MachineInstr &I) const {

242

243 Register DstReg = I.getOperand(0).getReg();

244 Register SrcReg = I.getOperand(1).getReg();

245 std::optional Arg =

247

248 if (Arg) {

249 const int64_t Value = Arg->Value.getZExtValue();

250 if (Value == 0) {

251 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;

253 } else {

256 }

257 I.eraseFromParent();

259 }

260

261

262 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC).addReg(SrcReg);

263

264 unsigned SelectOpcode =

265 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;

269

270 I.eraseFromParent();

272}

273

274bool AMDGPUInstructionSelector::selectReadAnyLane(MachineInstr &I) const {

275 Register DstReg = I.getOperand(0).getReg();

276 Register SrcReg = I.getOperand(1).getReg();

277

280

281 auto RFL = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)

283

284 I.eraseFromParent();

286}

287

288bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {

289 const Register DefReg = I.getOperand(0).getReg();

290 const LLT DefTy = MRI->getType(DefReg);

291

292

293

294

295

297 return false;

298

299

300

302 MRI->getRegClassOrRegBank(DefReg);

303

305 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

306 if (!DefRC) {

308 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");

309 return false;

310 }

311

312 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);

314 if (!DefRC) {

315 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");

316 return false;

317 }

318 }

319

320

321

322 for (unsigned i = 1; i != I.getNumOperands(); i += 2) {

323 const Register SrcReg = I.getOperand(i).getReg();

324

326 if (RB) {

327 const LLT SrcTy = MRI->getType(SrcReg);

331 return false;

332 }

333 }

334

335 I.setDesc(TII.get(TargetOpcode::PHI));

337}

338

340AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,

342 unsigned SubIdx) const {

343

346 Register DstReg = MRI->createVirtualRegister(&SubRC);

347

348 if (MO.isReg()) {

349 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);

351 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)

352 .addReg(Reg, 0, ComposedSubIdx);

353

358 }

359

361

363

364 switch (SubIdx) {

365 default:

366 llvm_unreachable("do not know to split immediate with this sub index.");

367 case AMDGPU::sub0:

369 case AMDGPU::sub1:

371 }

372}

373

375 switch (Opc) {

376 case AMDGPU::G_AND:

377 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;

378 case AMDGPU::G_OR:

379 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;

380 case AMDGPU::G_XOR:

381 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;

382 default:

384 }

385}

386

387bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {

388 Register DstReg = I.getOperand(0).getReg();

390

392 if (DstRB->getID() != AMDGPU::SGPRRegBankID &&

393 DstRB->getID() != AMDGPU::VCCRegBankID)

394 return false;

395

396 bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&

399

400

402 true,

403 false,

404 true));

406}

407

408bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {

411 Register DstReg = I.getOperand(0).getReg();

413 LLT Ty = MRI->getType(DstReg);

415 return false;

416

419 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;

420 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;

421

422 if (Size == 32) {

423 if (IsSALU) {

424 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;

426 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)

427 .add(I.getOperand(1))

428 .add(I.getOperand(2))

430 I.eraseFromParent();

432 }

433

435 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;

436 I.setDesc(TII.get(Opc));

440 }

441

442 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;

443

446 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)

448 .add(I.getOperand(1))

449 .add(I.getOperand(2))

451 I.eraseFromParent();

453 }

454

455 assert(!Sub && "illegal sub should not reach here");

456

458 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;

460 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;

461

462 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));

463 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));

464 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));

465 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));

466

467 Register DstLo = MRI->createVirtualRegister(&HalfRC);

468 Register DstHi = MRI->createVirtualRegister(&HalfRC);

469

470 if (IsSALU) {

471 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)

472 .add(Lo1)

473 .add(Lo2);

474 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)

475 .add(Hi1)

476 .add(Hi2)

478 } else {

480 Register CarryReg = MRI->createVirtualRegister(CarryRC);

481 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)

483 .add(Lo1)

484 .add(Lo2)

488 .add(Hi1)

489 .add(Hi2)

492

494 return false;

495 }

496

497 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

499 .addImm(AMDGPU::sub0)

501 .addImm(AMDGPU::sub1);

502

503

505 return false;

506

507 I.eraseFromParent();

508 return true;

509}

510

511bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(

516 Register Dst0Reg = I.getOperand(0).getReg();

517 Register Dst1Reg = I.getOperand(1).getReg();

518 const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO ||

519 I.getOpcode() == AMDGPU::G_UADDE;

520 const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE ||

521 I.getOpcode() == AMDGPU::G_USUBE;

522

523 if (isVCC(Dst1Reg, *MRI)) {

524 unsigned NoCarryOpc =

525 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;

526 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;

527 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));

531 }

532

533 Register Src0Reg = I.getOperand(2).getReg();

534 Register Src1Reg = I.getOperand(3).getReg();

535

536 if (HasCarryIn) {

537 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)

538 .addReg(I.getOperand(4).getReg());

539 }

540

541 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;

542 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;

543

544 auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)

545 .add(I.getOperand(2))

546 .add(I.getOperand(3));

547

548 if (MRI->use_nodbg_empty(Dst1Reg)) {

550 } else {

551 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)

552 .addReg(AMDGPU::SCC);

553 if (MRI->getRegClassOrNull(Dst1Reg))

554 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);

555 }

556

560 return false;

561

562 if (HasCarryIn &&

564 AMDGPU::SReg_32RegClass, *MRI))

565 return false;

566

567 I.eraseFromParent();

568 return true;

569}

570

571bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(

575 const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;

576

577 unsigned Opc;

579 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64

580 : AMDGPU::V_MAD_I64_I32_gfx11_e64;

581 else

582 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;

583 I.setDesc(TII.get(Opc));

585 I.addImplicitDefUseOperands(*MF);

587}

588

589

590bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {

592 Register DstReg = I.getOperand(0).getReg();

593 Register SrcReg = I.getOperand(1).getReg();

594 LLT DstTy = MRI->getType(DstReg);

595 LLT SrcTy = MRI->getType(SrcReg);

598

599

600 unsigned Offset = I.getOperand(2).getImm();

601 if (Offset % 32 != 0 || DstSize > 128)

602 return false;

603

604

605

606 if (DstSize == 16)

607 DstSize = 32;

608

612 return false;

613

617 if (!SrcRC)

618 return false;

620 DstSize / 32);

621 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);

622 if (!SrcRC)

623 return false;

624

626 *SrcRC, I.getOperand(1));

628 BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)

630

631 I.eraseFromParent();

632 return true;

633}

634

635bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {

637 Register DstReg = MI.getOperand(0).getReg();

638 LLT DstTy = MRI->getType(DstReg);

639 LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());

640

642 if (SrcSize < 32)

644

650 if (!DstRC)

651 return false;

652

655 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);

656 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {

660

664 return false;

665 }

666

668 return false;

669

670 MI.eraseFromParent();

671 return true;

672}

673

674bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {

676 const int NumDst = MI.getNumOperands() - 1;

677

679

680 Register SrcReg = Src.getReg();

681 Register DstReg0 = MI.getOperand(0).getReg();

682 LLT DstTy = MRI->getType(DstReg0);

683 LLT SrcTy = MRI->getType(SrcReg);

684

689

693 return false;

694

695

696

697

699 for (int I = 0, E = NumDst; I != E; ++I) {

701 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())

702 .addReg(SrcReg, 0, SubRegs[I]);

703

704

705 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]);

707 return false;

708

712 return false;

713 }

714

715 MI.eraseFromParent();

716 return true;

717}

718

719bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {

720 assert(MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||

721 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);

722

723 Register Src0 = MI.getOperand(1).getReg();

724 Register Src1 = MI.getOperand(2).getReg();

725 LLT SrcTy = MRI->getType(Src0);

727

728

729 if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {

730 return selectG_MERGE_VALUES(MI);

731 }

732

733

734

735 Register Dst = MI.getOperand(0).getReg();

737 (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&

740

742 if (DstBank->getID() == AMDGPU::AGPRRegBankID)

743 return false;

744

745 assert(DstBank->getID() == AMDGPU::SGPRRegBankID ||

746 DstBank->getID() == AMDGPU::VGPRRegBankID);

747 const bool IsVector = DstBank->getID() == AMDGPU::VGPRRegBankID;

748

751

752

753

754

756 if (ConstSrc1) {

757 auto ConstSrc0 =

759 if (ConstSrc0) {

760 const int64_t K0 = ConstSrc0->Value.getSExtValue();

761 const int64_t K1 = ConstSrc1->Value.getSExtValue();

765

766

767 if (IsVector) {

768 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), Dst).addImm(Imm);

769 MI.eraseFromParent();

771 }

772

773

774 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst).addImm(Imm);

775 MI.eraseFromParent();

777 }

778 }

779

780

782 return true;

783

784

785

787 if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {

788 MI.setDesc(TII.get(AMDGPU::COPY));

789 MI.removeOperand(2);

790 const auto &RC =

791 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;

794 }

795

796

797 if (IsVector) {

798 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

799 auto MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)

803 return false;

804

805 MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)

810 return false;

811

812 MI.eraseFromParent();

813 return true;

814 }

815

818

819

820

821

822

823

824

825

826

827

828

829

830

833

836

837 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;

838 if (Shift0 && Shift1) {

839 Opc = AMDGPU::S_PACK_HH_B32_B16;

840 MI.getOperand(1).setReg(ShiftSrc0);

841 MI.getOperand(2).setReg(ShiftSrc1);

842 } else if (Shift1) {

843 Opc = AMDGPU::S_PACK_LH_B32_B16;

844 MI.getOperand(2).setReg(ShiftSrc1);

845 } else if (Shift0) {

846 auto ConstSrc1 =

848 if (ConstSrc1 && ConstSrc1->Value == 0) {

849

850 auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)

854

855 MI.eraseFromParent();

857 }

859 Opc = AMDGPU::S_PACK_HL_B32_B16;

860 MI.getOperand(1).setReg(ShiftSrc0);

861 }

862 }

863

864 MI.setDesc(TII.get(Opc));

866}

867

868bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {

870

871

872

874 if ((!RC && MRI->getRegBankOrNull(MO.getReg())) ||

876 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));

877 return true;

878 }

879

880 return false;

881}

882

883bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {

885

886 Register DstReg = I.getOperand(0).getReg();

887 Register Src0Reg = I.getOperand(1).getReg();

888 Register Src1Reg = I.getOperand(2).getReg();

889 LLT Src1Ty = MRI->getType(Src1Reg);

890

891 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();

893

894 int64_t Offset = I.getOperand(3).getImm();

895

896

897 if (Offset % 32 != 0 || InsSize % 32 != 0)

898 return false;

899

900

901 if (InsSize > 128)

902 return false;

903

905 if (SubReg == AMDGPU::NoSubRegister)

906 return false;

907

911 if (!DstRC)

912 return false;

913

920

921

922

923 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);

924 if (!Src0RC || !Src1RC)

925 return false;

926

930 return false;

931

933 BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)

937

938 I.eraseFromParent();

939 return true;

940}

941

942bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(MachineInstr &MI) const {

943 Register DstReg = MI.getOperand(0).getReg();

944 Register SrcReg = MI.getOperand(1).getReg();

945 Register OffsetReg = MI.getOperand(2).getReg();

946 Register WidthReg = MI.getOperand(3).getReg();

947

949 "scalar BFX instructions are expanded in regbankselect");

950 assert(MRI->getType(MI.getOperand(0).getReg()).getSizeInBits() == 32 &&

951 "64-bit vector BFX instructions are expanded in regbankselect");

952

955

956 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SBFX;

957 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;

958 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), DstReg)

962 MI.eraseFromParent();

964}

965

966bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {

969

970 Register Dst = MI.getOperand(0).getReg();

971 Register Src0 = MI.getOperand(2).getReg();

972 Register M0Val = MI.getOperand(6).getReg();

976 return false;

977

978

979

980

981

982

983

984

985 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

988

989 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

991 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)

993 .addImm(MI.getOperand(4).getImm())

994 .addImm(MI.getOperand(3).getImm());

995

996 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_P1LV_F16), Dst)

997 .addImm(0)

998 .addReg(Src0)

999 .addImm(MI.getOperand(4).getImm())

1000 .addImm(MI.getOperand(3).getImm())

1001 .addImm(0)

1002 .addReg(InterpMov)

1003 .addImm(MI.getOperand(5).getImm())

1004 .addImm(0)

1005 .addImm(0);

1006

1007 MI.eraseFromParent();

1008 return true;

1009}

1010

1011

1012

1013

1014

1015

1016bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {

1017

1020

1023 Register VDst = MI.getOperand(0).getReg();

1024 Register Val = MI.getOperand(2).getReg();

1025 Register LaneSelect = MI.getOperand(3).getReg();

1026 Register VDstIn = MI.getOperand(4).getReg();

1027

1028 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);

1029

1030 std::optional ConstSelect =

1032 if (ConstSelect) {

1033

1034

1036 MIB.addImm(ConstSelect->Value.getSExtValue() &

1038 } else {

1039 std::optional ConstVal =

1041

1042

1043

1046 MIB.addImm(ConstVal->Value.getSExtValue());

1047 MIB.addReg(LaneSelect);

1048 } else {

1050

1051

1052

1053

1055

1056 BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

1057 .addReg(LaneSelect);

1058 MIB.addReg(AMDGPU::M0);

1059 }

1060 }

1061

1063

1064 MI.eraseFromParent();

1066}

1067

1068

1069

1070bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {

1071 Register Dst0 = MI.getOperand(0).getReg();

1072 Register Dst1 = MI.getOperand(1).getReg();

1073

1074 LLT Ty = MRI->getType(Dst0);

1075 unsigned Opc;

1077 Opc = AMDGPU::V_DIV_SCALE_F32_e64;

1079 Opc = AMDGPU::V_DIV_SCALE_F64_e64;

1080 else

1081 return false;

1082

1083

1084

1087

1088 Register Numer = MI.getOperand(3).getReg();

1089 Register Denom = MI.getOperand(4).getReg();

1090 unsigned ChooseDenom = MI.getOperand(5).getImm();

1091

1092 Register Src0 = ChooseDenom != 0 ? Numer : Denom;

1093

1094 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)

1096 .addImm(0)

1097 .addUse(Src0)

1098 .addImm(0)

1099 .addUse(Denom)

1100 .addImm(0)

1101 .addUse(Numer)

1102 .addImm(0)

1103 .addImm(0);

1104

1105 MI.eraseFromParent();

1107}

1108

1109bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {

1110 Intrinsic::ID IntrinsicID = cast(I).getIntrinsicID();

1111 switch (IntrinsicID) {

1112 case Intrinsic::amdgcn_if_break: {

1114

1115

1116

1117 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))

1118 .add(I.getOperand(0))

1119 .add(I.getOperand(2))

1120 .add(I.getOperand(3));

1121

1122 Register DstReg = I.getOperand(0).getReg();

1123 Register Src0Reg = I.getOperand(2).getReg();

1124 Register Src1Reg = I.getOperand(3).getReg();

1125

1126 I.eraseFromParent();

1127

1128 for (Register Reg : { DstReg, Src0Reg, Src1Reg })

1130

1131 return true;

1132 }

1133 case Intrinsic::amdgcn_interp_p1_f16:

1134 return selectInterpP1F16(I);

1135 case Intrinsic::amdgcn_wqm:

1136 return constrainCopyLikeIntrin(I, AMDGPU::WQM);

1137 case Intrinsic::amdgcn_softwqm:

1138 return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);

1139 case Intrinsic::amdgcn_strict_wwm:

1140 case Intrinsic::amdgcn_wwm:

1141 return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WWM);

1142 case Intrinsic::amdgcn_strict_wqm:

1143 return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WQM);

1144 case Intrinsic::amdgcn_writelane:

1145 return selectWritelane(I);

1146 case Intrinsic::amdgcn_div_scale:

1147 return selectDivScale(I);

1148 case Intrinsic::amdgcn_icmp:

1149 case Intrinsic::amdgcn_fcmp:

1151 return true;

1152 return selectIntrinsicCmp(I);

1153 case Intrinsic::amdgcn_ballot:

1154 return selectBallot(I);

1155 case Intrinsic::amdgcn_reloc_constant:

1156 return selectRelocConstant(I);

1157 case Intrinsic::amdgcn_groupstaticsize:

1158 return selectGroupStaticSize(I);

1159 case Intrinsic::returnaddress:

1160 return selectReturnAddress(I);

1161 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:

1162 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:

1163 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:

1164 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:

1165 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:

1166 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:

1167 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:

1168 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:

1169 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:

1170 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:

1171 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:

1172 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:

1173 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:

1174 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:

1175 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:

1176 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:

1177 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:

1178 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:

1179 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:

1180 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:

1181 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:

1182 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:

1183 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:

1184 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:

1185 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:

1186 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:

1187 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:

1188 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:

1189 return selectSMFMACIntrin(I);

1190 case Intrinsic::amdgcn_permlane16_swap:

1191 case Intrinsic::amdgcn_permlane32_swap:

1192 return selectPermlaneSwapIntrin(I, IntrinsicID);

1193 default:

1195 }

1196}

1197

1201 return -1;

1202

1203 if (Size == 16 && !ST.has16BitInsts())

1204 return -1;

1205

1206 const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc,

1207 unsigned FakeS16Opc, unsigned S32Opc,

1208 unsigned S64Opc) {

1209 if (Size == 16)

1210

1211 return ST.hasTrue16BitInsts()

1212 ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc

1213 : S16Opc;

1214 if (Size == 32)

1215 return S32Opc;

1216 return S64Opc;

1217 };

1218

1219 switch (P) {

1220 default:

1223 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,

1224 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,

1225 AMDGPU::V_CMP_NE_U64_e64);

1227 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,

1228 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,

1229 AMDGPU::V_CMP_EQ_U64_e64);

1231 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,

1232 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,

1233 AMDGPU::V_CMP_GT_I64_e64);

1235 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,

1236 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,

1237 AMDGPU::V_CMP_GE_I64_e64);

1239 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,

1240 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,

1241 AMDGPU::V_CMP_LT_I64_e64);

1243 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,

1244 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,

1245 AMDGPU::V_CMP_LE_I64_e64);

1247 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,

1248 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,

1249 AMDGPU::V_CMP_GT_U64_e64);

1251 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,

1252 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,

1253 AMDGPU::V_CMP_GE_U64_e64);

1255 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,

1256 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,

1257 AMDGPU::V_CMP_LT_U64_e64);

1259 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,

1260 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,

1261 AMDGPU::V_CMP_LE_U64_e64);

1262

1264 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,

1265 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,

1266 AMDGPU::V_CMP_EQ_F64_e64);

1268 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,

1269 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,

1270 AMDGPU::V_CMP_GT_F64_e64);

1272 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,

1273 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,

1274 AMDGPU::V_CMP_GE_F64_e64);

1276 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,

1277 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,

1278 AMDGPU::V_CMP_LT_F64_e64);

1280 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,

1281 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,

1282 AMDGPU::V_CMP_LE_F64_e64);

1284 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,

1285 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,

1286 AMDGPU::V_CMP_NEQ_F64_e64);

1288 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,

1289 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,

1290 AMDGPU::V_CMP_O_F64_e64);

1292 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,

1293 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,

1294 AMDGPU::V_CMP_U_F64_e64);

1296 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,

1297 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,

1298 AMDGPU::V_CMP_NLG_F64_e64);

1300 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,

1301 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,

1302 AMDGPU::V_CMP_NLE_F64_e64);

1304 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,

1305 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,

1306 AMDGPU::V_CMP_NLT_F64_e64);

1308 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,

1309 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,

1310 AMDGPU::V_CMP_NGE_F64_e64);

1312 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,

1313 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,

1314 AMDGPU::V_CMP_NGT_F64_e64);

1316 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,

1317 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,

1318 AMDGPU::V_CMP_NEQ_F64_e64);

1320 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,

1321 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,

1322 AMDGPU::V_CMP_TRU_F64_e64);

1324 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,

1325 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,

1326 AMDGPU::V_CMP_F_F64_e64);

1327 }

1328}

1329

1331 unsigned Size) const {

1332 if (Size == 64) {

1334 return -1;

1335

1336 switch (P) {

1338 return AMDGPU::S_CMP_LG_U64;

1340 return AMDGPU::S_CMP_EQ_U64;

1341 default:

1342 return -1;

1343 }

1344 }

1345

1346 if (Size == 32) {

1347 switch (P) {

1349 return AMDGPU::S_CMP_LG_U32;

1351 return AMDGPU::S_CMP_EQ_U32;

1353 return AMDGPU::S_CMP_GT_I32;

1355 return AMDGPU::S_CMP_GE_I32;

1357 return AMDGPU::S_CMP_LT_I32;

1359 return AMDGPU::S_CMP_LE_I32;

1361 return AMDGPU::S_CMP_GT_U32;

1363 return AMDGPU::S_CMP_GE_U32;

1365 return AMDGPU::S_CMP_LT_U32;

1367 return AMDGPU::S_CMP_LE_U32;

1369 return AMDGPU::S_CMP_EQ_F32;

1371 return AMDGPU::S_CMP_GT_F32;

1373 return AMDGPU::S_CMP_GE_F32;

1375 return AMDGPU::S_CMP_LT_F32;

1377 return AMDGPU::S_CMP_LE_F32;

1379 return AMDGPU::S_CMP_LG_F32;

1381 return AMDGPU::S_CMP_O_F32;

1383 return AMDGPU::S_CMP_U_F32;

1385 return AMDGPU::S_CMP_NLG_F32;

1387 return AMDGPU::S_CMP_NLE_F32;

1389 return AMDGPU::S_CMP_NLT_F32;

1391 return AMDGPU::S_CMP_NGE_F32;

1393 return AMDGPU::S_CMP_NGT_F32;

1395 return AMDGPU::S_CMP_NEQ_F32;

1396 default:

1398 }

1399 }

1400

1401 if (Size == 16) {

1403 return -1;

1404

1405 switch (P) {

1407 return AMDGPU::S_CMP_EQ_F16;

1409 return AMDGPU::S_CMP_GT_F16;

1411 return AMDGPU::S_CMP_GE_F16;

1413 return AMDGPU::S_CMP_LT_F16;

1415 return AMDGPU::S_CMP_LE_F16;

1417 return AMDGPU::S_CMP_LG_F16;

1419 return AMDGPU::S_CMP_O_F16;

1421 return AMDGPU::S_CMP_U_F16;

1423 return AMDGPU::S_CMP_NLG_F16;

1425 return AMDGPU::S_CMP_NLE_F16;

1427 return AMDGPU::S_CMP_NLT_F16;

1429 return AMDGPU::S_CMP_NGE_F16;

1431 return AMDGPU::S_CMP_NGT_F16;

1433 return AMDGPU::S_CMP_NEQ_F16;

1434 default:

1436 }

1437 }

1438

1439 return -1;

1440}

1441

1442bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {

1443

1446

1447 Register SrcReg = I.getOperand(2).getReg();

1449

1451

1452 Register CCReg = I.getOperand(0).getReg();

1453 if (!isVCC(CCReg, *MRI)) {

1454 int Opcode = getS_CMPOpcode(Pred, Size);

1455 if (Opcode == -1)

1456 return false;

1458 .add(I.getOperand(2))

1459 .add(I.getOperand(3));

1460 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)

1461 .addReg(AMDGPU::SCC);

1462 bool Ret =

1465 I.eraseFromParent();

1466 return Ret;

1467 }

1468

1469 if (I.getOpcode() == AMDGPU::G_FCMP)

1470 return false;

1471

1473 if (Opcode == -1)

1474 return false;

1475

1477 I.getOperand(0).getReg())

1478 .add(I.getOperand(2))

1479 .add(I.getOperand(3));

1483 I.eraseFromParent();

1484 return Ret;

1485}

1486

1487bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {

1488 Register Dst = I.getOperand(0).getReg();

1489 if (isVCC(Dst, *MRI))

1490 return false;

1491

1492 LLT DstTy = MRI->getType(Dst);

1494 return false;

1495

1498 Register SrcReg = I.getOperand(2).getReg();

1500

1501

1502 if (Size == 1)

1503 return false;

1504

1507 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);

1508 I.eraseFromParent();

1510 }

1511

1513 if (Opcode == -1)

1514 return false;

1515

1519 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS.getReg());

1520 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS.getReg());

1522 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, true);

1524 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &I, true);

1525 SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst);

1527 SelectedMI.addImm(Src0Mods);

1528 SelectedMI.addReg(Src0Reg);

1530 SelectedMI.addImm(Src1Mods);

1531 SelectedMI.addReg(Src1Reg);

1533 SelectedMI.addImm(0);

1535 SelectedMI.addImm(0);

1536

1539 return false;

1540

1541 I.eraseFromParent();

1542 return true;

1543}

1544

1545

1546

1547

1548

1552 if (MI->getParent() != MBB)

1553 return false;

1554

1555

1556 if (MI->getOpcode() == AMDGPU::COPY) {

1557 auto DstRB = MRI.getRegBankOrNull(MI->getOperand(0).getReg());

1558 auto SrcRB = MRI.getRegBankOrNull(MI->getOperand(1).getReg());

1559 if (DstRB && SrcRB && DstRB->getID() == AMDGPU::VCCRegBankID &&

1560 SrcRB->getID() == AMDGPU::SGPRRegBankID)

1561 return true;

1562 }

1563

1564

1565 if (isa(MI))

1566 return true;

1567

1569

1573

1574 return false;

1575}

1576

1577bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {

1580 Register DstReg = I.getOperand(0).getReg();

1581 Register SrcReg = I.getOperand(2).getReg();

1582 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();

1584

1585

1586

1587 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))

1588 return false;

1589

1590 std::optional Arg =

1592

1594

1595 if (BallotSize != WaveSize) {

1596 Dst = MRI->createVirtualRegister(TRI.getBoolRC());

1597 }

1598

1599 if (Arg) {

1600 const int64_t Value = Arg->Value.getZExtValue();

1601 if (Value == 0) {

1602

1603 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;

1605 } else {

1606

1609 }

1611 return false;

1612 } else {

1614

1615 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst).addReg(SrcReg);

1617 return false;

1618 } else {

1619

1620 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;

1621 auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), Dst)

1626 return false;

1627 }

1628 }

1629

1630

1631 if (BallotSize != WaveSize) {

1632 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

1633 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg).addImm(0);

1634 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

1636 .addImm(AMDGPU::sub0)

1638 .addImm(AMDGPU::sub1);

1639 }

1640

1641 I.eraseFromParent();

1642 return true;

1643}

1644

1645bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {

1646 Register DstReg = I.getOperand(0).getReg();

1650 return false;

1651

1652 const bool IsVALU = DstBank->getID() == AMDGPU::VGPRRegBankID;

1653

1656 auto SymbolName = cast(Metadata->getOperand(0))->getString();

1657 auto *RelocSymbol = cast(

1658 M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));

1659

1661 BuildMI(*BB, &I, I.getDebugLoc(),

1662 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)

1664

1665 I.eraseFromParent();

1666 return true;

1667}

1668

1669bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const {

1671

1672 Register DstReg = I.getOperand(0).getReg();

1674 unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ?

1675 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;

1676

1679

1680 auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg);

1681

1685 } else {

1690 }

1691

1692 I.eraseFromParent();

1694}

1695

1696bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {

1700

1702 Register DstReg = Dst.getReg();

1703 unsigned Depth = I.getOperand(2).getImm();

1704

1707 if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||

1709 return false;

1710

1711

1712 if (Depth != 0 ||

1714 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)

1716 I.eraseFromParent();

1717 return true;

1718 }

1719

1721

1723

1724

1727 AMDGPU::SReg_64RegClass, DL);

1728 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)

1730 I.eraseFromParent();

1731 return true;

1732}

1733

1734bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {

1735

1736

1738 BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))

1739 .add(MI.getOperand(1));

1740

1742 MI.eraseFromParent();

1743

1744 if (MRI->getRegClassOrNull(Reg))

1746 return true;

1747}

1748

1749bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(

1754

1755 unsigned IndexOperand = MI.getOperand(7).getImm();

1756 bool WaveRelease = MI.getOperand(8).getImm() != 0;

1757 bool WaveDone = MI.getOperand(9).getImm() != 0;

1758

1759 if (WaveDone && !WaveRelease)

1760 report_fatal_error("ds_ordered_count: wave_done requires wave_release");

1761

1762 unsigned OrderedCountIndex = IndexOperand & 0x3f;

1763 IndexOperand &= ~0x3f;

1764 unsigned CountDw = 0;

1765

1767 CountDw = (IndexOperand >> 24) & 0xf;

1768 IndexOperand &= ~(0xf << 24);

1769

1770 if (CountDw < 1 || CountDw > 4) {

1772 "ds_ordered_count: dword count must be between 1 and 4");

1773 }

1774 }

1775

1776 if (IndexOperand)

1778

1779 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;

1781

1782 unsigned Offset0 = OrderedCountIndex << 2;

1783 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);

1784

1786 Offset1 |= (CountDw - 1) << 6;

1787

1789 Offset1 |= ShaderType << 2;

1790

1791 unsigned Offset = Offset0 | (Offset1 << 8);

1792

1793 Register M0Val = MI.getOperand(2).getReg();

1794 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

1796

1797 Register DstReg = MI.getOperand(0).getReg();

1798 Register ValReg = MI.getOperand(3).getReg();

1800 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)

1804

1806 return false;

1807

1809 MI.eraseFromParent();

1810 return Ret;

1811}

1812

1814 switch (IntrID) {

1815 case Intrinsic::amdgcn_ds_gws_init:

1816 return AMDGPU::DS_GWS_INIT;

1817 case Intrinsic::amdgcn_ds_gws_barrier:

1818 return AMDGPU::DS_GWS_BARRIER;

1819 case Intrinsic::amdgcn_ds_gws_sema_v:

1820 return AMDGPU::DS_GWS_SEMA_V;

1821 case Intrinsic::amdgcn_ds_gws_sema_br:

1822 return AMDGPU::DS_GWS_SEMA_BR;

1823 case Intrinsic::amdgcn_ds_gws_sema_p:

1824 return AMDGPU::DS_GWS_SEMA_P;

1825 case Intrinsic::amdgcn_ds_gws_sema_release_all:

1826 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;

1827 default:

1829 }

1830}

1831

1832bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,

1834 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&

1836 return false;

1837

1838

1839 const bool HasVSrc = MI.getNumOperands() == 3;

1840 assert(HasVSrc || MI.getNumOperands() == 2);

1841

1842 Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();

1844 if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)

1845 return false;

1846

1848 unsigned ImmOffset;

1849

1852

1854

1855

1856

1857

1858 if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {

1859 Readfirstlane = OffsetDef;

1862 }

1863

1864 if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {

1865

1866

1867

1868

1869

1871 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)

1873 } else {

1874 std::tie(BaseOffset, ImmOffset) =

1876

1877 if (Readfirstlane) {

1878

1879

1881 return false;

1882

1885 } else {

1887 AMDGPU::SReg_32RegClass, *MRI))

1888 return false;

1889 }

1890

1891 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

1892 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)

1896

1897 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

1899 }

1900

1901

1902

1903

1905

1906 if (HasVSrc) {

1907 Register VSrc = MI.getOperand(1).getReg();

1909

1911 return false;

1912 }

1913

1914 MIB.addImm(ImmOffset)

1916

1918

1919 MI.eraseFromParent();

1920 return true;

1921}

1922

1923bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,

1924 bool IsAppend) const {

1925 Register PtrBase = MI.getOperand(2).getReg();

1926 LLT PtrTy = MRI->getType(PtrBase);

1928

1930 std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));

1931

1932

1933 if (!isDSOffsetLegal(PtrBase, Offset)) {

1934 PtrBase = MI.getOperand(2).getReg();

1936 }

1937

1940 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;

1941

1942 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

1945 return false;

1946

1947 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())

1949 .addImm(IsGDS ? -1 : 0)

1951 MI.eraseFromParent();

1953}

1954

1955bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const {

1958

1961}

1962

1963bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {

1964 Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID();

1968

1969

1970 if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||

1971 IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {

1974 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));

1975 }

1976 MI.eraseFromParent();

1977 return true;

1978 }

1979 }

1980

1981 if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {

1982

1985 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))

1987 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT))

1989 MI.eraseFromParent();

1990 return true;

1991 }

1992

1994}

1995

1997 bool &IsTexFail) {

1998 if (TexFailCtrl)

1999 IsTexFail = true;

2000

2001 TFE = (TexFailCtrl & 0x1) ? true : false;

2002 TexFailCtrl &= ~(uint64_t)0x1;

2003 LWE = (TexFailCtrl & 0x2) ? true : false;

2004 TexFailCtrl &= ~(uint64_t)0x2;

2005

2006 return TexFailCtrl == 0;

2007}

2008

2009bool AMDGPUInstructionSelector::selectImageIntrinsic(

2013

2016

2018 unsigned IntrOpcode = Intr->BaseOpcode;

2022

2023 const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;

2024

2026 LLT VDataTy;

2027 int NumVDataDwords = -1;

2028 bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||

2029 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;

2030

2031 bool Unorm;

2032 if (!BaseOpcode->Sampler)

2033 Unorm = true;

2034 else

2035 Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0;

2036

2037 bool TFE;

2038 bool LWE;

2039 bool IsTexFail = false;

2040 if (parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(),

2041 TFE, LWE, IsTexFail))

2042 return false;

2043

2044 const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm();

2045 const bool IsA16 = (Flags & 1) != 0;

2046 const bool IsG16 = (Flags & 2) != 0;

2047

2048

2049 if (IsA16 && !STI.hasG16() && !IsG16)

2050 return false;

2051

2052 unsigned DMask = 0;

2053 unsigned DMaskLanes = 0;

2054

2055 if (BaseOpcode->Atomic) {

2056 VDataOut = MI.getOperand(0).getReg();

2057 VDataIn = MI.getOperand(2).getReg();

2058 LLT Ty = MRI->getType(VDataIn);

2059

2060

2061 const bool Is64Bit = BaseOpcode->AtomicX2 ?

2064

2066 assert(MI.getOperand(3).getReg() == AMDGPU::NoRegister);

2067

2068 DMask = Is64Bit ? 0xf : 0x3;

2069 NumVDataDwords = Is64Bit ? 4 : 2;

2070 } else {

2071 DMask = Is64Bit ? 0x3 : 0x1;

2072 NumVDataDwords = Is64Bit ? 2 : 1;

2073 }

2074 } else {

2075 DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm();

2077

2078 if (BaseOpcode->Store) {

2079 VDataIn = MI.getOperand(1).getReg();

2080 VDataTy = MRI->getType(VDataIn);

2081 NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32;

2082 } else if (BaseOpcode->NoReturn) {

2083 NumVDataDwords = 0;

2084 } else {

2085 VDataOut = MI.getOperand(0).getReg();

2086 VDataTy = MRI->getType(VDataOut);

2087 NumVDataDwords = DMaskLanes;

2088

2090 NumVDataDwords = (DMaskLanes + 1) / 2;

2091 }

2092 }

2093

2094

2095 if (Subtarget->hasG16() && IsG16) {

2098 assert(G16MappingInfo);

2099 IntrOpcode = G16MappingInfo->G16;

2100 }

2101

2102

2103 assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");

2104

2105 unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();

2106 if (BaseOpcode->Atomic)

2110 return false;

2111

2112 int NumVAddrRegs = 0;

2113 int NumVAddrDwords = 0;

2114 for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {

2115

2117 if (!AddrOp.isReg())

2118 continue;

2119

2122 break;

2123

2124 ++NumVAddrRegs;

2125 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;

2126 }

2127

2128

2129

2130

2131 const bool UseNSA =

2132 NumVAddrRegs != 1 &&

2134 : NumVAddrDwords == NumVAddrRegs);

2135 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {

2136 LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n");

2137 return false;

2138 }

2139

2140 if (IsTexFail)

2141 ++NumVDataDwords;

2142

2143 int Opcode = -1;

2144 if (IsGFX12Plus) {

2146 NumVDataDwords, NumVAddrDwords);

2147 } else if (IsGFX11Plus) {

2149 UseNSA ? AMDGPU::MIMGEncGfx11NSA

2150 : AMDGPU::MIMGEncGfx11Default,

2151 NumVDataDwords, NumVAddrDwords);

2152 } else if (IsGFX10Plus) {

2154 UseNSA ? AMDGPU::MIMGEncGfx10NSA

2155 : AMDGPU::MIMGEncGfx10Default,

2156 NumVDataDwords, NumVAddrDwords);

2157 } else {

2160 NumVDataDwords, NumVAddrDwords);

2161 if (Opcode == -1) {

2164 << "requested image instruction is not supported on this GPU\n");

2165 return false;

2166 }

2167 }

2168 if (Opcode == -1 &&

2171 NumVDataDwords, NumVAddrDwords);

2172 if (Opcode == -1)

2174 NumVDataDwords, NumVAddrDwords);

2175 }

2176 if (Opcode == -1)

2177 return false;

2178

2181

2182 if (VDataOut) {

2184 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;

2185

2186 Register TmpReg = MRI->createVirtualRegister(

2187 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);

2188 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;

2189

2191 if (MRI->use_empty(VDataOut)) {

2192 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut)

2194 }

2195

2196 } else {

2197 MIB.addDef(VDataOut);

2198 }

2199 }

2200

2201 if (VDataIn)

2202 MIB.addReg(VDataIn);

2203

2204 for (int I = 0; I != NumVAddrRegs; ++I) {

2206 if (SrcOp.isReg()) {

2209 }

2210 }

2211

2212 MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg());

2213 if (BaseOpcode->Sampler)

2214 MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg());

2215

2216 MIB.addImm(DMask);

2217

2218 if (IsGFX10Plus)

2222

2224 MIB.addImm(IsA16 &&

2225 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);

2226 if (IsGFX10Plus)

2227 MIB.addImm(IsA16 ? -1 : 0);

2228

2230 MIB.addImm(TFE);

2231 } else if (TFE) {

2232 LLVM_DEBUG(dbgs() << "TFE is not supported on this GPU\n");

2233 return false;

2234 }

2235

2237 MIB.addImm(LWE);

2238 if (!IsGFX10Plus)

2239 MIB.addImm(DimInfo->DA ? -1 : 0);

2240 if (BaseOpcode->HasD16)

2241 MIB.addImm(IsD16 ? -1 : 0);

2242

2243 MI.eraseFromParent();

2246 return true;

2247}

2248

2249

2250

2251bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(

2253 Register Dst0 = MI.getOperand(0).getReg();

2254 Register Dst1 = MI.getOperand(1).getReg();

2255

2258

2260 Register Data0 = MI.getOperand(4).getReg();

2261 Register Data1 = MI.getOperand(5).getReg();

2262 unsigned Offset = MI.getOperand(6).getImm();

2263

2264 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)

2271

2272 MI.eraseFromParent();

2274}

2275

2276bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(

2278 Intrinsic::ID IntrinsicID = cast(I).getIntrinsicID();

2279 switch (IntrinsicID) {

2280 case Intrinsic::amdgcn_end_cf:

2281 return selectEndCfIntrinsic(I);

2282 case Intrinsic::amdgcn_ds_ordered_add:

2283 case Intrinsic::amdgcn_ds_ordered_swap:

2284 return selectDSOrderedIntrinsic(I, IntrinsicID);

2285 case Intrinsic::amdgcn_ds_gws_init:

2286 case Intrinsic::amdgcn_ds_gws_barrier:

2287 case Intrinsic::amdgcn_ds_gws_sema_v:

2288 case Intrinsic::amdgcn_ds_gws_sema_br:

2289 case Intrinsic::amdgcn_ds_gws_sema_p:

2290 case Intrinsic::amdgcn_ds_gws_sema_release_all:

2291 return selectDSGWSIntrinsic(I, IntrinsicID);

2292 case Intrinsic::amdgcn_ds_append:

2293 return selectDSAppendConsume(I, true);

2294 case Intrinsic::amdgcn_ds_consume:

2295 return selectDSAppendConsume(I, false);

2296 case Intrinsic::amdgcn_init_whole_wave:

2297 return selectInitWholeWave(I);

2298 case Intrinsic::amdgcn_s_barrier:

2299 case Intrinsic::amdgcn_s_barrier_signal:

2300 case Intrinsic::amdgcn_s_barrier_wait:

2301 return selectSBarrier(I);

2302 case Intrinsic::amdgcn_raw_buffer_load_lds:

2303 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:

2304 case Intrinsic::amdgcn_struct_buffer_load_lds:

2305 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:

2306 return selectBufferLoadLds(I);

2307 case Intrinsic::amdgcn_global_load_lds:

2308 return selectGlobalLoadLds(I);

2309 case Intrinsic::amdgcn_exp_compr:

2311 Function &F = I.getMF()->getFunction();

2313 F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error);

2314 F.getContext().diagnose(NoFpRet);

2315 return false;

2316 }

2317 break;

2318 case Intrinsic::amdgcn_ds_bvh_stack_rtn:

2319 return selectDSBvhStackIntrinsic(I);

2320 case Intrinsic::amdgcn_s_barrier_init:

2321 case Intrinsic::amdgcn_s_barrier_signal_var:

2322 return selectNamedBarrierInit(I, IntrinsicID);

2323 case Intrinsic::amdgcn_s_barrier_join:

2324 case Intrinsic::amdgcn_s_get_named_barrier_state:

2325 return selectNamedBarrierInst(I, IntrinsicID);

2326 case Intrinsic::amdgcn_s_get_barrier_state:

2327 return selectSGetBarrierState(I, IntrinsicID);

2328 case Intrinsic::amdgcn_s_barrier_signal_isfirst:

2329 return selectSBarrierSignalIsfirst(I, IntrinsicID);

2330 }

2332}

2333

2334bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {

2336 return true;

2337

2340

2341 Register DstReg = I.getOperand(0).getReg();

2346 if (!isVCC(CCReg, *MRI)) {

2347 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :

2348 AMDGPU::S_CSELECT_B32;

2351

2352

2353

2354

2355 if (MRI->getRegClassOrNull(CCReg))

2358 .add(I.getOperand(2))

2359 .add(I.getOperand(3));

2360

2361 bool Ret = false;

2364 I.eraseFromParent();

2365 return Ret;

2366 }

2367

2368

2369 if (Size > 32)

2370 return false;

2371

2373 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)

2375 .add(I.getOperand(3))

2377 .add(I.getOperand(2))

2378 .add(I.getOperand(1));

2379

2381 I.eraseFromParent();

2382 return Ret;

2383}

2384

2385bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {

2386 Register DstReg = I.getOperand(0).getReg();

2387 Register SrcReg = I.getOperand(1).getReg();

2388 const LLT DstTy = MRI->getType(DstReg);

2389 const LLT SrcTy = MRI->getType(SrcReg);

2391

2394 if (DstTy == S1) {

2395

2396

2397 DstRB = SrcRB;

2398 } else {

2399 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);

2400 if (SrcRB != DstRB)

2401 return false;

2402 }

2403

2404 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;

2405

2408

2413 if (!SrcRC || !DstRC)

2414 return false;

2415

2418 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");

2419 return false;

2420 }

2421

2422 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {

2426 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), DstReg)

2427 .addReg(SrcReg, 0, AMDGPU::lo16);

2428 I.eraseFromParent();

2429 return true;

2430 }

2431

2435

2436 Register LoReg = MRI->createVirtualRegister(DstRC);

2437 Register HiReg = MRI->createVirtualRegister(DstRC);

2438 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), LoReg)

2439 .addReg(SrcReg, 0, AMDGPU::sub0);

2440 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), HiReg)

2441 .addReg(SrcReg, 0, AMDGPU::sub1);

2442

2443 if (IsVALU && STI.hasSDWA()) {

2444

2445

2447 BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)

2448 .addImm(0)

2449 .addReg(HiReg)

2450 .addImm(0)

2456 } else {

2457 Register TmpReg0 = MRI->createVirtualRegister(DstRC);

2458 Register TmpReg1 = MRI->createVirtualRegister(DstRC);

2459 Register ImmReg = MRI->createVirtualRegister(DstRC);

2460 if (IsVALU) {

2461 BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)

2464 } else {

2465 BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)

2469 }

2470

2471 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;

2472 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;

2473 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;

2474

2483

2484 if (!IsVALU) {

2485 And.setOperandDead(3);

2486 Or.setOperandDead(3);

2487 }

2488 }

2489

2490 I.eraseFromParent();

2491 return true;

2492 }

2493

2495 return false;

2496

2497 if (SrcSize > 32) {

2498 unsigned SubRegIdx =

2500 if (SubRegIdx == AMDGPU::NoSubRegister)

2501 return false;

2502

2503

2504

2506 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);

2507 if (!SrcWithSubRC)

2508 return false;

2509

2510 if (SrcWithSubRC != SrcRC) {

2512 return false;

2513 }

2514

2515 I.getOperand(1).setSubReg(SubRegIdx);

2516 }

2517

2518 I.setDesc(TII.get(TargetOpcode::COPY));

2519 return true;

2520}

2521

2522

2524 Mask = maskTrailingOnes(Size);

2525 int SignedMask = static_cast<int>(Mask);

2526 return SignedMask >= -16 && SignedMask <= 64;

2527}

2528

2529

2530const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(

2534 if (auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))

2535 return RB;

2536

2537

2538 if (auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))

2540 return nullptr;

2541}

2542

2543bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {

2544 bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG;

2545 bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg;

2548 const Register DstReg = I.getOperand(0).getReg();

2549 const Register SrcReg = I.getOperand(1).getReg();

2550

2551 const LLT DstTy = MRI->getType(DstReg);

2552 const LLT SrcTy = MRI->getType(SrcReg);

2553 const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ?

2557 return false;

2558

2559

2560 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);

2561

2562

2563 if (I.getOpcode() == AMDGPU::G_ANYEXT) {

2564 if (DstSize <= 32)

2565 return selectCOPY(I);

2566

2568 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);

2571 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);

2572

2573 Register UndefReg = MRI->createVirtualRegister(SrcRC);

2574 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);

2575 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

2577 .addImm(AMDGPU::sub0)

2579 .addImm(AMDGPU::sub1);

2580 I.eraseFromParent();

2581

2584 }

2585

2586 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {

2587

2588

2589

2590 unsigned Mask;

2593 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)

2596 I.eraseFromParent();

2598 }

2599

2600 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;

2604 .addImm(0)

2605 .addImm(SrcSize);

2606 I.eraseFromParent();

2608 }

2609

2610 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {

2612 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;

2614 return false;

2615

2616 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {

2617 const unsigned SextOpc = SrcSize == 8 ?

2618 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;

2621 I.eraseFromParent();

2623 }

2624

2625

2626

2627 if (DstSize > 32 && SrcSize == 32) {

2628 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2629 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;

2631 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg)

2635 } else {

2636 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)

2638 }

2639 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

2641 .addImm(AMDGPU::sub0)

2643 .addImm(AMDGPU::sub1);

2644 I.eraseFromParent();

2646 *MRI);

2647 }

2648

2649 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;

2650 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;

2651

2652

2653 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {

2654

2655 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

2656 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2657 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;

2658

2659 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);

2660 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)

2662 .addImm(AMDGPU::sub0)

2664 .addImm(AMDGPU::sub1);

2665

2668 .addImm(SrcSize << 16);

2669

2670 I.eraseFromParent();

2672 }

2673

2674 unsigned Mask;

2676 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)

2680 } else {

2683 .addImm(SrcSize << 16);

2684 }

2685

2686 I.eraseFromParent();

2688 }

2689

2690 return false;

2691}

2692

2695}

2696

2700 Reg = BitcastSrc;

2701 return Reg;

2702}

2703

2708 return false;

2709

2716 return true;

2717 }

2718 }

2719

2721 if (Shuffle->getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)

2722 return false;

2723

2726

2728 assert(Mask.size() == 2);

2729

2730 if (Mask[0] == 1 && Mask[1] <= 1) {

2732 return true;

2733 }

2734

2735 return false;

2736}

2737

2738bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {

2740 return false;

2741

2742 Register Dst = I.getOperand(0).getReg();

2744 if (DstRB->getID() != AMDGPU::SGPRRegBankID)

2745 return false;

2746

2747 Register Src = I.getOperand(1).getReg();

2748

2753 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)

2755 I.eraseFromParent();

2757 }

2758 }

2759

2760 return false;

2761}

2762

2763bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {

2764

2765

2766

2767

2768

2769

2770

2771

2772

2773

2774

2775 Register Dst = MI.getOperand(0).getReg();

2777 if (DstRB->getID() != AMDGPU::SGPRRegBankID ||

2779 return false;

2780

2781 Register Src = MI.getOperand(1).getReg();

2783 if (Fabs)

2785

2788 return false;

2789

2792 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2793 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2794 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2795 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2796

2797 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)

2798 .addReg(Src, 0, AMDGPU::sub0);

2799 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)

2800 .addReg(Src, 0, AMDGPU::sub1);

2801 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)

2802 .addImm(0x80000000);

2803

2804

2805 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;

2806 BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)

2810 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)

2812 .addImm(AMDGPU::sub0)

2814 .addImm(AMDGPU::sub1);

2815 MI.eraseFromParent();

2816 return true;

2817}

2818

2819

2820bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {

2821 Register Dst = MI.getOperand(0).getReg();

2823 if (DstRB->getID() != AMDGPU::SGPRRegBankID ||

2825 return false;

2826

2827 Register Src = MI.getOperand(1).getReg();

2830 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2831 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2832 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2833 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2834

2837 return false;

2838

2839 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)

2840 .addReg(Src, 0, AMDGPU::sub0);

2841 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)

2842 .addReg(Src, 0, AMDGPU::sub1);

2843 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)

2844 .addImm(0x7fffffff);

2845

2846

2847

2848 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)

2852 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)

2854 .addImm(AMDGPU::sub0)

2856 .addImm(AMDGPU::sub1);

2857

2858 MI.eraseFromParent();

2859 return true;

2860}

2861

2863 return MI.getOpcode() == TargetOpcode::G_CONSTANT;

2864}

2865

2866void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,

2868

2869 unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;

2871 MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg());

2872

2874

2875 if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD)

2876 return;

2877

2878 GEPInfo GEPInfo;

2879

2880 for (unsigned i = 1; i != 3; ++i) {

2885

2886

2887 assert(GEPInfo.Imm == 0);

2889 continue;

2890 }

2892 if (OpBank->getID() == AMDGPU::SGPRRegBankID)

2893 GEPInfo.SgprParts.push_back(GEPOp.getReg());

2894 else

2895 GEPInfo.VgprParts.push_back(GEPOp.getReg());

2896 }

2897

2899 getAddrModeInfo(*PtrMI, MRI, AddrInfo);

2900}

2901

2902bool AMDGPUInstructionSelector::isSGPR(Register Reg) const {

2903 return RBI.getRegBank(Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;

2904}

2905

2906bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {

2907 if (MI.hasOneMemOperand())

2908 return false;

2909

2912

2913

2914

2915

2916

2917 if (Ptr || isa(Ptr) || isa(Ptr) ||

2918 isa(Ptr) || isa(Ptr))

2919 return true;

2920

2922 return true;

2923

2924 if (MI.getOpcode() == AMDGPU::G_PREFETCH)

2925 return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==

2926 AMDGPU::SGPRRegBankID;

2927

2929 return I && I->getMetadata("amdgpu.uniform");

2930}

2931

2932bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const {

2933 for (const GEPInfo &GEPInfo : AddrInfo) {

2934 if (!GEPInfo.VgprParts.empty())

2935 return true;

2936 }

2937 return false;

2938}

2939

2940void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {

2941 const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());

2946

2947

2948 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)

2950 }

2951}

2952

2953bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(

2955 initM0(I);

2957}

2958

2960 if (Reg.isPhysical())

2961 return false;

2962

2964 const unsigned Opcode = MI.getOpcode();

2965

2966 if (Opcode == AMDGPU::COPY)

2968

2969 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||

2970 Opcode == AMDGPU::G_XOR)

2973

2974 if (auto *GI = dyn_cast(&MI))

2975 return GI->is(Intrinsic::amdgcn_class);

2976

2977 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;

2978}

2979

2980bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {

2985

2986 unsigned BrOpcode;

2989

2990

2991

2992

2993

2994

2995 if (!isVCC(CondReg, *MRI)) {

2997 return false;

2998

2999 CondPhysReg = AMDGPU::SCC;

3000 BrOpcode = AMDGPU::S_CBRANCH_SCC1;

3001 ConstrainRC = &AMDGPU::SReg_32RegClass;

3002 } else {

3003

3004

3005

3006

3008 const bool Is64 = STI.isWave64();

3009 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;

3010 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;

3011

3012 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());

3013 BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)

3017 CondReg = TmpReg;

3018 }

3019

3020 CondPhysReg = TRI.getVCC();

3021 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;

3022 ConstrainRC = TRI.getBoolRC();

3023 }

3024

3025 if (MRI->getRegClassOrNull(CondReg))

3026 MRI->setRegClass(CondReg, ConstrainRC);

3027

3028 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)

3030 BuildMI(*BB, &I, DL, TII.get(BrOpcode))

3031 .addMBB(I.getOperand(1).getMBB());

3032

3033 I.eraseFromParent();

3034 return true;

3035}

3036

3037bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(

3039 Register DstReg = I.getOperand(0).getReg();

3041 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;

3042 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));

3043 if (IsVGPR)

3045

3047 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);

3048}

3049

3050bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {

3051 Register DstReg = I.getOperand(0).getReg();

3052 Register SrcReg = I.getOperand(1).getReg();

3053 Register MaskReg = I.getOperand(2).getReg();

3054 LLT Ty = MRI->getType(DstReg);

3055 LLT MaskTy = MRI->getType(MaskReg);

3058

3062 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;

3063 if (DstRB != SrcRB)

3064 return false;

3065

3066

3067

3071

3072 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;

3073 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;

3074

3076 !CanCopyLow32 && !CanCopyHi32) {

3077 auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)

3081 I.eraseFromParent();

3083 }

3084

3085 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;

3087 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;

3088

3092 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);

3093

3097 return false;

3098

3101 "ptrmask should have been narrowed during legalize");

3102

3103 auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)

3106

3107 if (!IsVGPR)

3109 I.eraseFromParent();

3110 return true;

3111 }

3112

3113 Register HiReg = MRI->createVirtualRegister(&RegRC);

3114 Register LoReg = MRI->createVirtualRegister(&RegRC);

3115

3116

3117 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)

3118 .addReg(SrcReg, 0, AMDGPU::sub0);

3119 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)

3120 .addReg(SrcReg, 0, AMDGPU::sub1);

3121

3122 Register MaskedLo, MaskedHi;

3123

3124 if (CanCopyLow32) {

3125

3126 MaskedLo = LoReg;

3127 } else {

3128

3129 Register MaskLo = MRI->createVirtualRegister(&RegRC);

3130 MaskedLo = MRI->createVirtualRegister(&RegRC);

3131

3132 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskLo)

3133 .addReg(MaskReg, 0, AMDGPU::sub0);

3134 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedLo)

3137 }

3138

3139 if (CanCopyHi32) {

3140

3141 MaskedHi = HiReg;

3142 } else {

3143 Register MaskHi = MRI->createVirtualRegister(&RegRC);

3144 MaskedHi = MRI->createVirtualRegister(&RegRC);

3145

3146 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskHi)

3147 .addReg(MaskReg, 0, AMDGPU::sub1);

3148 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedHi)

3151 }

3152

3153 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

3155 .addImm(AMDGPU::sub0)

3157 .addImm(AMDGPU::sub1);

3158 I.eraseFromParent();

3159 return true;

3160}

3161

3162

3163

3164static std::pair<Register, unsigned>

3170

3171 std::tie(IdxBaseReg, Offset) =

3173 if (IdxBaseReg == AMDGPU::NoRegister) {

3174

3175

3177 IdxBaseReg = IdxReg;

3178 }

3179

3181

3182

3183

3184 if (static_cast<unsigned>(Offset) >= SubRegs.size())

3185 return std::pair(IdxReg, SubRegs[0]);

3186 return std::pair(IdxBaseReg, SubRegs[Offset]);

3187}

3188

3189bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(

3191 Register DstReg = MI.getOperand(0).getReg();

3192 Register SrcReg = MI.getOperand(1).getReg();

3193 Register IdxReg = MI.getOperand(2).getReg();

3194

3195 LLT DstTy = MRI->getType(DstReg);

3196 LLT SrcTy = MRI->getType(SrcReg);

3197

3201

3202

3203

3204 if (IdxRB->getID() != AMDGPU::SGPRRegBankID)

3205 return false;

3206

3208 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);

3210 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);

3211 if (!SrcRC || !DstRC)

3212 return false;

3216 return false;

3217

3221

3224 *MRI, TRI, SrcRC, IdxReg, DstTy.getSizeInBits() / 8, *KB);

3225

3226 if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {

3228 return false;

3229

3230 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3232

3233 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;

3234 BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg)

3237 MI.eraseFromParent();

3238 return true;

3239 }

3240

3241 if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32)

3242 return false;

3243

3245 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3247 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)

3250 MI.eraseFromParent();

3251 return true;

3252 }

3253

3256 BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)

3260

3261 MI.eraseFromParent();

3262 return true;

3263}

3264

3265

3266bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(

3268 Register DstReg = MI.getOperand(0).getReg();

3269 Register VecReg = MI.getOperand(1).getReg();

3270 Register ValReg = MI.getOperand(2).getReg();

3271 Register IdxReg = MI.getOperand(3).getReg();

3272

3273 LLT VecTy = MRI->getType(DstReg);

3274 LLT ValTy = MRI->getType(ValReg);

3277

3281

3283

3284

3285

3286 if (IdxRB->getID() != AMDGPU::SGPRRegBankID)

3287 return false;

3288

3290 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);

3292 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);

3293

3298 return false;

3299

3300 if (VecRB->getID() == AMDGPU::VGPRRegBankID && ValSize != 32)

3301 return false;

3302

3304 std::tie(IdxReg, SubReg) =

3306

3307 const bool IndexMode = VecRB->getID() == AMDGPU::VGPRRegBankID &&

3309

3312

3313 if (!IndexMode) {

3314 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3316

3318 VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID);

3319 BuildMI(*BB, MI, DL, RegWriteOp, DstReg)

3323 MI.eraseFromParent();

3324 return true;

3325 }

3326

3329 BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)

3334

3335 MI.eraseFromParent();

3336 return true;

3337}

3338

3339bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {

3341 unsigned Opc;

3342 unsigned Size = MI.getOperand(3).getImm();

3343

3344

3345 const bool HasVIndex = MI.getNumOperands() == 9;

3347 int OpOffset = 0;

3348 if (HasVIndex) {

3349 VIndex = MI.getOperand(4).getReg();

3350 OpOffset = 1;

3351 }

3352

3353 Register VOffset = MI.getOperand(4 + OpOffset).getReg();

3354 std::optional MaybeVOffset =

3356 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();

3357

3358 switch (Size) {

3359 default:

3360 return false;

3361 case 1:

3362 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN

3363 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN

3364 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN

3365 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;

3366 break;

3367 case 2:

3368 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN

3369 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN

3370 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN

3371 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;

3372 break;

3373 case 4:

3374 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN

3375 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN

3376 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN

3377 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;

3378 break;

3379 case 12:

3381 return false;

3382

3383 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN

3384 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN

3385 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN

3386 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;

3387 break;

3388 case 16:

3390 return false;

3391

3392 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN

3393 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN

3394 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN

3395 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;

3396 break;

3397 }

3398

3401 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3402 .add(MI.getOperand(2));

3403

3405

3406 if (HasVIndex && HasVOffset) {

3407 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());

3408 BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)

3410 .addImm(AMDGPU::sub0)

3412 .addImm(AMDGPU::sub1);

3413

3415 } else if (HasVIndex) {

3417 } else if (HasVOffset) {

3418 MIB.addReg(VOffset);

3419 }

3420

3421 MIB.add(MI.getOperand(1));

3422 MIB.add(MI.getOperand(5 + OpOffset));

3423 MIB.add(MI.getOperand(6 + OpOffset));

3425 unsigned Aux = MI.getOperand(7 + OpOffset).getImm();

3430 ? 1

3431 : 0);

3432

3435 LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm();

3437 StorePtrI.V = nullptr;

3439

3444

3448

3449 MIB.setMemRefs({LoadMMO, StoreMMO});

3450

3451 MI.eraseFromParent();

3453}

3454

3455

3460

3461

3463 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)

3465

3466 assert(Def->getNumOperands() == 3 &&

3467 MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64));

3469 return Def->getOperand(1).getReg();

3470 }

3471

3473}

3474

3475bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{

3476 unsigned Opc;

3477 unsigned Size = MI.getOperand(3).getImm();

3478

3479 switch (Size) {

3480 default:

3481 return false;

3482 case 1:

3483 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;

3484 break;

3485 case 2:

3486 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;

3487 break;

3488 case 4:

3489 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;

3490 break;

3491 case 12:

3493 return false;

3494 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;

3495 break;

3496 case 16:

3498 return false;

3499 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;

3500 break;

3501 }

3502

3505 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3506 .add(MI.getOperand(2));

3507

3510

3511

3512 if (!isSGPR(Addr)) {

3514 if (isSGPR(AddrDef->Reg)) {

3515 Addr = AddrDef->Reg;

3516 } else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {

3519 if (isSGPR(SAddr)) {

3520 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();

3522 Addr = SAddr;

3523 VOffset = Off;

3524 }

3525 }

3526 }

3527 }

3528

3529 if (isSGPR(Addr)) {

3531 if (!VOffset) {

3532 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

3533 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), VOffset)

3535 }

3536 }

3537

3540

3541 if (isSGPR(Addr))

3542 MIB.addReg(VOffset);

3543

3544 MIB.add(MI.getOperand(4))

3545 .add(MI.getOperand(5));

3546

3549 LoadPtrI.Offset = MI.getOperand(4).getImm();

3559 sizeof(int32_t), Align(4));

3560

3561 MIB.setMemRefs({LoadMMO, StoreMMO});

3562

3563 MI.eraseFromParent();

3565}

3566

3567bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{

3568 MI.setDesc(TII.get(MI.getOperand(1).getImm()));

3569 MI.removeOperand(1);

3570 MI.addImplicitDefUseOperands(*MI.getParent()->getParent());

3571 return true;

3572}

3573

3574

3575

3576bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {

3577 unsigned Opc;

3579 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:

3580 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;

3581 break;

3582 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:

3583 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;

3584 break;

3585 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:

3586 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;

3587 break;

3588 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:

3589 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;

3590 break;

3591 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:

3592 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;

3593 break;

3594 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:

3595 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;

3596 break;

3597 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:

3598 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;

3599 break;

3600 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:

3601 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;

3602 break;

3603 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:

3604 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;

3605 break;

3606 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:

3607 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;

3608 break;

3609 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:

3610 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;

3611 break;

3612 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:

3613 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;

3614 break;

3615 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:

3616 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;

3617 break;

3618 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:

3619 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;

3620 break;

3621 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:

3622 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;

3623 break;

3624 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:

3625 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;

3626 break;

3627 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:

3628 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;

3629 break;

3630 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:

3631 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;

3632 break;

3633 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:

3634 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;

3635 break;

3636 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:

3637 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;

3638 break;

3639 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:

3640 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;

3641 break;

3642 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:

3643 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;

3644 break;

3645 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:

3646 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;

3647 break;

3648 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:

3649 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;

3650 break;

3651 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:

3652 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;

3653 break;

3654 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:

3655 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;

3656 break;

3657 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:

3658 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;

3659 break;

3660 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:

3661 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;

3662 break;

3663 default:

3665 }

3666

3667 auto VDst_In = MI.getOperand(4);

3668

3669 MI.setDesc(TII.get(Opc));

3670 MI.removeOperand(4);

3671 MI.removeOperand(1);

3672 MI.addOperand(VDst_In);

3673 MI.addImplicitDefUseOperands(*MI.getParent()->getParent());

3674 return true;

3675}

3676

3677bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(

3679 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&

3681 return false;

3682 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&

3684 return false;

3685

3686 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap

3687 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64

3688 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;

3689

3690 MI.removeOperand(2);

3691 MI.setDesc(TII.get(Opcode));

3693

3696

3698}

3699

3700bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {

3701 Register DstReg = MI.getOperand(0).getReg();

3702 Register SrcReg = MI.getOperand(1).getReg();

3704 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;

3707

3708 if (IsVALU) {

3709 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)

3712 } else {

3713 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)

3717 }

3718

3720 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;

3722 return false;

3723

3724 MI.eraseFromParent();

3725 return true;

3726}

3727

3728

3729

3733 unsigned NumOpcodes = 0;

3734 uint8_t LHSBits, RHSBits;

3735

3737

3738

3739

3740

3741

3742

3743

3744

3745

3746 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };

3747

3749 Bits = 0xff;

3750 return true;

3751 }

3753 Bits = 0;

3754 return true;

3755 }

3756

3757 for (unsigned I = 0; I < Src.size(); ++I) {

3758

3759 if (Src[I] == Op) {

3760 Bits = SrcBits[I];

3761 return true;

3762 }

3763

3764 if (Src[I] == R) {

3765 Bits = SrcBits[I];

3766 Src[I] = Op;

3767 return true;

3768 }

3769 }

3770

3771 if (Src.size() == 3) {

3772

3773

3774

3778 for (unsigned I = 0; I < Src.size(); ++I) {

3779 if (Src[I] == LHS) {

3780 Bits = ~SrcBits[I];

3781 return true;

3782 }

3783 }

3784 }

3785

3786 return false;

3787 }

3788

3789 Bits = SrcBits[Src.size()];

3790 Src.push_back(Op);

3791 return true;

3792 };

3793

3795 switch (MI->getOpcode()) {

3796 case TargetOpcode::G_AND:

3797 case TargetOpcode::G_OR:

3798 case TargetOpcode::G_XOR: {

3801

3803 if (!getOperandBits(LHS, LHSBits) ||

3804 !getOperandBits(RHS, RHSBits)) {

3805 Src = Backup;

3806 return std::make_pair(0, 0);

3807 }

3808

3809

3811 if (Op.first) {

3812 NumOpcodes += Op.first;

3813 LHSBits = Op.second;

3814 }

3815

3817 if (Op.first) {

3818 NumOpcodes += Op.first;

3819 RHSBits = Op.second;

3820 }

3821 break;

3822 }

3823 default:

3824 return std::make_pair(0, 0);

3825 }

3826

3828 switch (MI->getOpcode()) {

3829 case TargetOpcode::G_AND:

3830 TTbl = LHSBits & RHSBits;

3831 break;

3832 case TargetOpcode::G_OR:

3833 TTbl = LHSBits | RHSBits;

3834 break;

3835 case TargetOpcode::G_XOR:

3836 TTbl = LHSBits ^ RHSBits;

3837 break;

3838 default:

3839 break;

3840 }

3841

3842 return std::make_pair(NumOpcodes + 1, TTbl);

3843}

3844

3845bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {

3847 return false;

3848

3849 Register DstReg = MI.getOperand(0).getReg();

3851 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;

3852 if (!IsVALU)

3853 return false;

3854

3857 unsigned NumOpcodes;

3858

3859 std::tie(NumOpcodes, TTbl) = BitOp3_Op(DstReg, Src, *MRI);

3860

3861

3862

3863 if (NumOpcodes < 2 || Src.empty())

3864 return false;

3865

3866 const bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);

3867 if (NumOpcodes == 2 && IsB32) {

3868

3869

3870

3874 return false;

3875 } else if (NumOpcodes < 4) {

3876

3877

3878

3879 return false;

3880 }

3881

3882 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;

3886

3887 for (unsigned I = 0; I < Src.size(); ++I) {

3889 if (RB->getID() != AMDGPU::SGPRRegBankID)

3890 continue;

3891 if (CBL > 0) {

3892 --CBL;

3893 continue;

3894 }

3895 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

3896 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::COPY), NewReg)

3898 Src[I] = NewReg;

3899 }

3900

3901

3902

3903

3904

3905

3906 while (Src.size() < 3)

3907 Src.push_back(Src[0]);

3908

3909 auto MIB = BuildMI(*MBB, MI, DL, TII.get(Opc), DstReg);

3910 if (!IsB32)

3911 MIB.addImm(0);

3913 if (!IsB32)

3914 MIB.addImm(0);

3916 if (!IsB32)

3917 MIB.addImm(0);

3920 if (!IsB32)

3921 MIB.addImm(0);

3922

3924 MI.eraseFromParent();

3925

3926 return true;

3927}

3928

3929bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {

3930 Register SrcReg = MI.getOperand(0).getReg();

3932 return false;

3933

3940

3941 if (!WaveAddr) {

3942 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

3943 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr)

3947 }

3948

3951

3952 MI.eraseFromParent();

3953 return true;

3954}

3955

3957

3958 if (I.isPreISelOpcode()) {

3959 if (I.isCopy())

3960 return selectCOPY(I);

3961 return true;

3962 }

3963

3964 switch (I.getOpcode()) {

3965 case TargetOpcode::G_AND:

3966 case TargetOpcode::G_OR:

3967 case TargetOpcode::G_XOR:

3968 if (selectBITOP3(I))

3969 return true;

3971 return true;

3972 return selectG_AND_OR_XOR(I);

3973 case TargetOpcode::G_ADD:

3974 case TargetOpcode::G_SUB:

3975 case TargetOpcode::G_PTR_ADD:

3977 return true;

3978 return selectG_ADD_SUB(I);

3979 case TargetOpcode::G_UADDO:

3980 case TargetOpcode::G_USUBO:

3981 case TargetOpcode::G_UADDE:

3982 case TargetOpcode::G_USUBE:

3983 return selectG_UADDO_USUBO_UADDE_USUBE(I);

3984 case AMDGPU::G_AMDGPU_MAD_U64_U32:

3985 case AMDGPU::G_AMDGPU_MAD_I64_I32:

3986 return selectG_AMDGPU_MAD_64_32(I);

3987 case TargetOpcode::G_INTTOPTR:

3988 case TargetOpcode::G_BITCAST:

3989 case TargetOpcode::G_PTRTOINT:

3990 case TargetOpcode::G_FREEZE:

3991 return selectCOPY(I);

3992 case TargetOpcode::G_FNEG:

3994 return true;

3995 return selectG_FNEG(I);

3996 case TargetOpcode::G_FABS:

3998 return true;

3999 return selectG_FABS(I);

4000 case TargetOpcode::G_EXTRACT:

4001 return selectG_EXTRACT(I);

4002 case TargetOpcode::G_MERGE_VALUES:

4003 case TargetOpcode::G_CONCAT_VECTORS:

4004 return selectG_MERGE_VALUES(I);

4005 case TargetOpcode::G_UNMERGE_VALUES:

4006 return selectG_UNMERGE_VALUES(I);

4007 case TargetOpcode::G_BUILD_VECTOR:

4008 case TargetOpcode::G_BUILD_VECTOR_TRUNC:

4009 return selectG_BUILD_VECTOR(I);

4010 case TargetOpcode::G_IMPLICIT_DEF:

4011 return selectG_IMPLICIT_DEF(I);

4012 case TargetOpcode::G_INSERT:

4013 return selectG_INSERT(I);

4014 case TargetOpcode::G_INTRINSIC:

4015 case TargetOpcode::G_INTRINSIC_CONVERGENT:

4016 return selectG_INTRINSIC(I);

4017 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:

4018 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:

4019 return selectG_INTRINSIC_W_SIDE_EFFECTS(I);

4020 case TargetOpcode::G_ICMP:

4021 case TargetOpcode::G_FCMP:

4022 if (selectG_ICMP_or_FCMP(I))

4023 return true;

4025 case TargetOpcode::G_LOAD:

4026 case TargetOpcode::G_ZEXTLOAD:

4027 case TargetOpcode::G_SEXTLOAD:

4028 case TargetOpcode::G_STORE:

4029 case TargetOpcode::G_ATOMIC_CMPXCHG:

4030 case TargetOpcode::G_ATOMICRMW_XCHG:

4031 case TargetOpcode::G_ATOMICRMW_ADD:

4032 case TargetOpcode::G_ATOMICRMW_SUB:

4033 case TargetOpcode::G_ATOMICRMW_AND:

4034 case TargetOpcode::G_ATOMICRMW_OR:

4035 case TargetOpcode::G_ATOMICRMW_XOR:

4036 case TargetOpcode::G_ATOMICRMW_MIN:

4037 case TargetOpcode::G_ATOMICRMW_MAX:

4038 case TargetOpcode::G_ATOMICRMW_UMIN:

4039 case TargetOpcode::G_ATOMICRMW_UMAX:

4040 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:

4041 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:

4042 case TargetOpcode::G_ATOMICRMW_FADD:

4043 case TargetOpcode::G_ATOMICRMW_FMIN:

4044 case TargetOpcode::G_ATOMICRMW_FMAX:

4045 return selectG_LOAD_STORE_ATOMICRMW(I);

4046 case TargetOpcode::G_SELECT:

4047 return selectG_SELECT(I);

4048 case TargetOpcode::G_TRUNC:

4049 return selectG_TRUNC(I);

4050 case TargetOpcode::G_SEXT:

4051 case TargetOpcode::G_ZEXT:

4052 case TargetOpcode::G_ANYEXT:

4053 case TargetOpcode::G_SEXT_INREG:

4054

4055

4056

4057 if (MRI->getType(I.getOperand(1).getReg()) != LLT::scalar(1) &&

4059 return true;

4060 return selectG_SZA_EXT(I);

4061 case TargetOpcode::G_FPEXT:

4062 if (selectG_FPEXT(I))

4063 return true;

4065 case TargetOpcode::G_BRCOND:

4066 return selectG_BRCOND(I);

4067 case TargetOpcode::G_GLOBAL_VALUE:

4068 return selectG_GLOBAL_VALUE(I);

4069 case TargetOpcode::G_PTRMASK:

4070 return selectG_PTRMASK(I);

4071 case TargetOpcode::G_EXTRACT_VECTOR_ELT:

4072 return selectG_EXTRACT_VECTOR_ELT(I);

4073 case TargetOpcode::G_INSERT_VECTOR_ELT:

4074 return selectG_INSERT_VECTOR_ELT(I);

4075 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:

4076 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:

4077 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:

4078 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:

4079 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {

4082 assert(Intr && "not an image intrinsic with image pseudo");

4083 return selectImageIntrinsic(I, Intr);

4084 }

4085 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:

4086 return selectBVHIntrinsic(I);

4087 case AMDGPU::G_SBFX:

4088 case AMDGPU::G_UBFX:

4089 return selectG_SBFX_UBFX(I);

4090 case AMDGPU::G_SI_CALL:

4091 I.setDesc(TII.get(AMDGPU::SI_CALL));

4092 return true;

4093 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:

4094 return selectWaveAddress(I);

4095 case AMDGPU::G_STACKRESTORE:

4096 return selectStackRestore(I);

4097 case AMDGPU::G_PHI:

4098 return selectPHI(I);

4099 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:

4100 return selectCOPY_SCC_VCC(I);

4101 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:

4102 return selectCOPY_VCC_SCC(I);

4103 case AMDGPU::G_AMDGPU_READANYLANE:

4104 return selectReadAnyLane(I);

4105 case TargetOpcode::G_CONSTANT:

4106 case TargetOpcode::G_FCONSTANT:

4107 default:

4109 }

4110 return false;

4111}

4112

4114AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {

4115 return {{

4117 }};

4118

4119}

4120

4121std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(

4122 Register Src, bool IsCanonicalizing, bool AllowAbs, bool OpSel) const {

4123 unsigned Mods = 0;

4125

4126 if (MI->getOpcode() == AMDGPU::G_FNEG) {

4127 Src = MI->getOperand(1).getReg();

4130 } else if (MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {

4131

4132

4135 if (LHS && LHS->isZero()) {

4137 Src = MI->getOperand(2).getReg();

4138 }

4139 }

4140

4141 if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) {

4142 Src = MI->getOperand(1).getReg();

4144 }

4145

4146 if (OpSel)

4148

4149 return std::pair(Src, Mods);

4150}

4151

4152Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(

4154 bool ForceVGPR) const {

4155 if ((Mods != 0 || ForceVGPR) &&

4156 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {

4157

4158

4159

4160

4163 TII.get(AMDGPU::COPY), VGPRSrc)

4165 Src = VGPRSrc;

4166 }

4167

4168 return Src;

4169}

4170

4171

4172

4173

4175AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {

4176 return {{

4178 }};

4179}

4180

4182AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {

4184 unsigned Mods;

4185 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

4186

4187 return {{

4189 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4190 },

4194 }};

4195}

4196

4198AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {

4200 unsigned Mods;

4201 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),

4202 true,

4203 false);

4204

4205 return {{

4207 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4208 },

4212 }};

4213}

4214

4216AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {

4217 return {{

4221 }};

4222}

4223

4225AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {

4227 unsigned Mods;

4228 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

4229

4230 return {{

4232 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4233 },

4235 }};

4236}

4237

4239AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(

4242 unsigned Mods;

4243 std::tie(Src, Mods) =

4244 selectVOP3ModsImpl(Root.getReg(), false);

4245

4246 return {{

4248 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4249 },

4251 }};

4252}

4253

4255AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {

4257 unsigned Mods;

4258 std::tie(Src, Mods) =

4259 selectVOP3ModsImpl(Root.getReg(), true,

4260 false);

4261

4262 return {{

4264 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4265 },

4267 }};

4268}

4269

4271AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {

4274 if (Def->getOpcode() == AMDGPU::G_FNEG || Def->getOpcode() == AMDGPU::G_FABS)

4275 return {};

4276 return {{

4278 }};

4279}

4280

4281std::pair<Register, unsigned>

4282AMDGPUInstructionSelector::selectVOP3PModsImpl(

4284 unsigned Mods = 0;

4286

4287 if (MI->getOpcode() == AMDGPU::G_FNEG &&

4288

4289

4292 Src = MI->getOperand(1).getReg();

4293 MI = MRI.getVRegDef(Src);

4294 }

4295

4296

4297

4298

4299 (void)IsDOT;

4300

4301

4303

4304 return std::pair(Src, Mods);

4305}

4306

4308AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {

4311

4313 unsigned Mods;

4314 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);

4315

4316 return {{

4319 }};

4320}

4321

4323AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {

4326

4328 unsigned Mods;

4329 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI, true);

4330

4331 return {{

4334 }};

4335}

4336

4338AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {

4339

4340

4341

4343 "expected i1 value");

4345 if (Root.getImm() == -1)

4347 return {{

4349 }};

4350}

4351

4353AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(

4356 "expected i1 value");

4358 if (Root.getImm() != 0)

4360

4361 return {{

4363 }};

4364}

4365

4370 switch (Elts.size()) {

4371 case 8:

4372 DstRegClass = &AMDGPU::VReg_256RegClass;

4373 break;

4374 case 4:

4375 DstRegClass = &AMDGPU::VReg_128RegClass;

4376 break;

4377 case 2:

4378 DstRegClass = &AMDGPU::VReg_64RegClass;

4379 break;

4380 default:

4382 }

4383

4385 auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE)

4386 .addDef(MRI.createVirtualRegister(DstRegClass));

4387 for (unsigned i = 0; i < Elts.size(); ++i) {

4388 MIB.addReg(Elts[i]);

4390 }

4392}

4393

4398 if (ModOpcode == TargetOpcode::G_FNEG) {

4400

4402 for (auto El : Elts) {

4405 break;

4407 }

4408 if (Elts.size() != NegAbsElts.size()) {

4409

4411 } else {

4412

4415 }

4416 } else {

4417 assert(ModOpcode == TargetOpcode::G_FABS);

4418

4421 }

4422}

4423

4425AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const {

4429

4430 if (GBuildVector *BV = dyn_cast(MRI->getVRegDef(Src))) {

4431 assert(BV->getNumSources() > 0);

4432

4433 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));

4434 unsigned ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG)

4435 ? AMDGPU::G_FNEG

4436 : AMDGPU::G_FABS;

4437 for (unsigned i = 0; i < BV->getNumSources(); ++i) {

4438 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));

4439 if (ElF32->getOpcode() != ModOpcode)

4440 break;

4442 }

4443

4444

4445 if (BV->getNumSources() == EltsF32.size()) {

4447 *MRI);

4448 }

4449 }

4450

4453}

4454

4456AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const {

4460

4461 if (GConcatVectors *CV = dyn_cast(MRI->getVRegDef(Src))) {

4462 for (unsigned i = 0; i < CV->getNumSources(); ++i) {

4465 break;

4467 }

4468

4469

4470 if (CV->getNumSources() == EltsV2F16.size()) {

4474 }

4475 }

4476

4479}

4480

4482AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const {

4486

4487 if (GConcatVectors *CV = dyn_cast(MRI->getVRegDef(Src))) {

4488 assert(CV->getNumSources() > 0);

4489 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));

4490

4491 unsigned ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG)

4492 ? AMDGPU::G_FNEG

4493 : AMDGPU::G_FABS;

4494

4495 for (unsigned i = 0; i < CV->getNumSources(); ++i) {

4496 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));

4497 if (ElV2F16->getOpcode() != ModOpcode)

4498 break;

4500 }

4501

4502

4503 if (CV->getNumSources() == EltsV2F16.size()) {

4506 *MRI);

4507 }

4508 }

4509

4512}

4513

4515AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const {

4516 std::optional FPValReg;

4520 MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());

4521 }}};

4522 }

4523

4524

4525 return {};

4526 }

4527

4531 return {

4533 }

4534 }

4535

4536 return {};

4537}

4538

4540AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const {

4543 unsigned Key = 0;

4544

4546 std::optional ShiftAmt;

4548 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&

4549 ShiftAmt->Value.getZExtValue() % 8 == 0) {

4550 Key = ShiftAmt->Value.getZExtValue() / 8;

4551 Src = ShiftSrc;

4552 }

4553

4554 return {{

4557 }};

4558}

4559

4561AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const {

4562

4565 unsigned Key = 0;

4566

4568 std::optional ShiftAmt;

4570 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&

4571 ShiftAmt->Value.getZExtValue() == 16) {

4572 Src = ShiftSrc;

4573 Key = 1;

4574 }

4575

4576 return {{

4579 }};

4580}

4581

4583AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {

4585 unsigned Mods;

4586 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

4587

4588

4589 return {{

4592 }};

4593}

4594

4596AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {

4598 unsigned Mods;

4599 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),

4600 true,

4601 false,

4602 false);

4603

4604 return {{

4607 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, true));

4608 },

4610 }};

4611}

4612

4614AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {

4616 unsigned Mods;

4617 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),

4618 true,

4619 false,

4620 true);

4621

4622 return {{

4625 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, true));

4626 },

4628 }};

4629}

4630

4631bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,

4634 int64_t *Offset) const {

4637

4638

4639

4641 getAddrModeInfo(*MI, *MRI, AddrInfo);

4642

4643 if (AddrInfo.empty())

4644 return false;

4645

4646 const GEPInfo &GEPI = AddrInfo[0];

4647 std::optional<int64_t> EncodedImm;

4648

4649 if (SOffset && Offset) {

4651 true);

4652 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&

4653 AddrInfo.size() > 1) {

4654 const GEPInfo &GEPI2 = AddrInfo[1];

4655 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {

4658 Base = GEPI2.SgprParts[0];

4659 *SOffset = OffsetReg;

4660 *Offset = *EncodedImm;

4662 return true;

4663

4664

4665

4666

4667

4669 if (*Offset + SKnown.getMinValue().getSExtValue() < 0)

4670 return false;

4671

4672 return true;

4673 }

4674 }

4675 }

4676 return false;

4677 }

4678

4680 false);

4681 if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {

4682 Base = GEPI.SgprParts[0];

4683 *Offset = *EncodedImm;

4684 return true;

4685 }

4686

4687

4688 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&

4689 GEPI.Imm != 0) {

4690

4691

4692

4693

4694 Base = GEPI.SgprParts[0];

4695 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

4696 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)

4698 return true;

4699 }

4700

4701 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {

4703 Base = GEPI.SgprParts[0];

4704 *SOffset = OffsetReg;

4705 return true;

4706 }

4707 }

4708

4709 return false;

4710}

4711

4713AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {

4716 if (!selectSmrdOffset(Root, Base, nullptr, &Offset))

4717 return std::nullopt;

4718

4721}

4722

4724AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {

4726 getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);

4727

4728 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)

4729 return std::nullopt;

4730

4731 const GEPInfo &GEPInfo = AddrInfo[0];

4732 Register PtrReg = GEPInfo.SgprParts[0];

4733 std::optional<int64_t> EncodedImm =

4735 if (!EncodedImm)

4736 return std::nullopt;

4737

4738 return {{

4741 }};

4742}

4743

4745AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {

4747 if (!selectSmrdOffset(Root, Base, &SOffset, nullptr))

4748 return std::nullopt;

4749

4752}

4753

4755AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {

4758 if (!selectSmrdOffset(Root, Base, &SOffset, &Offset))

4759 return std::nullopt;

4760

4764}

4765

4766std::pair<Register, int>

4767AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,

4768 uint64_t FlatVariant) const {

4770

4772

4775

4777 int64_t ConstOffset;

4778 std::tie(PtrBase, ConstOffset) =

4779 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);

4780

4782 !isFlatScratchBaseLegal(Root.getReg())))

4784

4785 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();

4786 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))

4788

4789 return std::pair(PtrBase, ConstOffset);

4790}

4791

4793AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {

4794 auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FLAT);

4795

4796 return {{

4799 }};

4800}

4801

4803AMDGPUInstructionSelector::selectGlobalOffset(MachineOperand &Root) const {

4805

4806 return {{

4809 }};

4810}

4811

4813AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const {

4815

4816 return {{

4819 }};

4820}

4821

4822

4824AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {

4827 int64_t ConstOffset;

4828 int64_t ImmOffset = 0;

4829

4830

4831

4832 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

4833

4834 if (ConstOffset != 0) {

4837 Addr = PtrBase;

4838 ImmOffset = ConstOffset;

4839 } else {

4841 if (isSGPR(PtrBaseDef->Reg)) {

4842 if (ConstOffset > 0) {

4843

4844

4845

4846

4847

4848 int64_t SplitImmOffset, RemainderOffset;

4849 std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset(

4851

4852 if (isUInt<32>(RemainderOffset)) {

4856 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

4857

4858 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),

4859 HighBits)

4860 .addImm(RemainderOffset);

4861

4862 return {{

4865 MIB.addReg(HighBits);

4866 },

4868 }};

4869 }

4870 }

4871

4872

4873

4874

4875

4876

4877 unsigned NumLiterals =

4881 return std::nullopt;

4882 }

4883 }

4884 }

4885

4886

4888 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {

4889

4892

4893 if (isSGPR(SAddr)) {

4894 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();

4895

4896

4897

4901 },

4903 MIB.addReg(VOffset);

4904 },

4906 MIB.addImm(ImmOffset);

4907 }}};

4908 }

4909 }

4910 }

4911

4912

4913

4914 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||

4915 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))

4916 return std::nullopt;

4917

4918

4919

4922 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

4923

4924 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)

4926

4927 return {{

4931 }};

4932}

4933

4935AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {

4938 int64_t ConstOffset;

4939 int64_t ImmOffset = 0;

4940

4941

4942

4943 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

4944

4945 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&

4948 Addr = PtrBase;

4949 ImmOffset = ConstOffset;

4950 }

4951

4953 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {

4954 int FI = AddrDef->MI->getOperand(1).getIndex();

4955 return {{

4958 }};

4959 }

4960

4961 Register SAddr = AddrDef->Reg;

4962

4963 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {

4964 Register LHS = AddrDef->MI->getOperand(1).getReg();

4965 Register RHS = AddrDef->MI->getOperand(2).getReg();

4968

4969 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&

4970 isSGPR(RHSDef->Reg)) {

4971 int FI = LHSDef->MI->getOperand(1).getIndex();

4975 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

4976

4977 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)

4979 .addReg(RHSDef->Reg)

4981 }

4982 }

4983

4984 if (!isSGPR(SAddr))

4985 return std::nullopt;

4986

4987 return {{

4990 }};

4991}

4992

4993

4994bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(

4997 return false;

4998

4999

5000

5001

5005 uint64_t VMax = VKnown.getMaxValue().getZExtValue();

5006 uint64_t SMax = SKnown.getMaxValue().getZExtValue();

5007 return (VMax & 3) + (SMax & 3) >= 4;

5008}

5009

5011AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {

5014 int64_t ConstOffset;

5015 int64_t ImmOffset = 0;

5016

5017

5018

5019 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

5020

5022 if (ConstOffset != 0 &&

5024 Addr = PtrBase;

5025 ImmOffset = ConstOffset;

5026 }

5027

5029 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)

5030 return std::nullopt;

5031

5032 Register RHS = AddrDef->MI->getOperand(2).getReg();

5033 if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)

5034 return std::nullopt;

5035

5036 Register LHS = AddrDef->MI->getOperand(1).getReg();

5038

5039 if (OrigAddr != Addr) {

5040 if (!isFlatScratchBaseLegalSVImm(OrigAddr))

5041 return std::nullopt;

5042 } else {

5043 if (!isFlatScratchBaseLegalSV(OrigAddr))

5044 return std::nullopt;

5045 }

5046

5047 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))

5048 return std::nullopt;

5049

5050 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {

5051 int FI = LHSDef->MI->getOperand(1).getIndex();

5052 return {{

5056 }};

5057 }

5058

5059 if (!isSGPR(LHS))

5060 return std::nullopt;

5061

5062 return {{

5066 }};

5067}

5068

5070AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {

5075

5079 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

5080

5081

5082

5084 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),

5085 HighBits)

5087

5089 MIB.addReg(Info->getScratchRSrcReg());

5090 },

5092 MIB.addReg(HighBits);

5093 },

5095

5096

5098 },

5101 }}};

5102 }

5103

5105

5106

5107

5108 std::optional FI;

5110

5113 int64_t ConstOffset;

5114 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);

5115 if (ConstOffset != 0) {

5119 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);

5120 if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX)

5122 else

5123 VAddr = PtrBase;

5124 Offset = ConstOffset;

5125 }

5126 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {

5128 }

5129

5131 MIB.addReg(Info->getScratchRSrcReg());

5132 },

5134 if (FI)

5136 else

5138 },

5140

5141

5143 },

5146 }}};

5147}

5148

5149bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,

5150 int64_t Offset) const {

5151 if (!isUInt<16>(Offset))

5152 return false;

5153

5155 return true;

5156

5157

5158

5160}

5161

5162bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,

5163 int64_t Offset1,

5164 unsigned Size) const {

5165 if (Offset0 % Size != 0 || Offset1 % Size != 0)

5166 return false;

5167 if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))

5168 return false;

5169

5171 return true;

5172

5173

5174

5176}

5177

5178

5180 return Addr->getOpcode() == TargetOpcode::G_OR ||

5181 (Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&

5183}

5184

5185

5186

5187

5188bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {

5190

5192 return true;

5193

5194

5195

5197 return true;

5198

5201

5202 if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {

5203 std::optional RhsValReg =

5205

5206

5207

5208

5209 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&

5210 RhsValReg->Value.getSExtValue() > -0x40000000)

5211 return true;

5212 }

5213

5215}

5216

5217

5218

5219bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const {

5221

5223 return true;

5224

5225

5226

5228 return true;

5229

5233}

5234

5235

5236

5237bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(

5239

5240

5242 return true;

5243

5246 std::optional BaseDef =

5248 std::optional RHSOffset =

5251

5252

5253

5254

5255

5258 (RHSOffset->Value.getSExtValue() < 0 &&

5259 RHSOffset->Value.getSExtValue() > -0x40000000)))

5260 return true;

5261

5262 Register LHS = BaseDef->MI->getOperand(1).getReg();

5263 Register RHS = BaseDef->MI->getOperand(2).getReg();

5265}

5266

5267bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,

5268 unsigned ShAmtBits) const {

5269 assert(MI.getOpcode() == TargetOpcode::G_AND);

5270

5271 std::optional RHS =

5273 if (!RHS)

5274 return false;

5275

5276 if (RHS->countr_one() >= ShAmtBits)

5277 return true;

5278

5280 return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;

5281}

5282

5284AMDGPUInstructionSelector::selectMUBUFScratchOffset(

5288

5289 std::optional Def =

5291 assert(Def && "this shouldn't be an optional result");

5293

5295 return {{

5297 MIB.addReg(Info->getScratchRSrcReg());

5298 },

5300 MIB.addReg(WaveBase);

5301 },

5303 }};

5304 }

5305

5307

5308

5314 return {};

5317 if (!WaveBase)

5318 return {};

5319

5320 return {{

5322 MIB.addReg(Info->getScratchRSrcReg());

5323 },

5325 MIB.addReg(WaveBase);

5326 },

5328 }};

5329 }

5330

5333 return {};

5334

5335 return {{

5337 MIB.addReg(Info->getScratchRSrcReg());

5338 },

5341 },

5343 }};

5344}

5345

5346std::pair<Register, unsigned>

5347AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const {

5349 int64_t ConstAddr = 0;

5350

5353 std::tie(PtrBase, Offset) =

5354 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);

5355

5357 if (isDSOffsetLegal(PtrBase, Offset)) {

5358

5359 return std::pair(PtrBase, Offset);

5360 }

5361 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {

5362

5363

5364

5366

5367

5368 }

5369

5370 return std::pair(Root.getReg(), 0);

5371}

5372

5374AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {

5377 std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);

5378 return {{

5381 }};

5382}

5383

5385AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {

5386 return selectDSReadWrite2(Root, 4);

5387}

5388

5390AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(MachineOperand &Root) const {

5391 return selectDSReadWrite2(Root, 8);

5392}

5393

5395AMDGPUInstructionSelector::selectDSReadWrite2(MachineOperand &Root,

5396 unsigned Size) const {

5399 std::tie(Reg, Offset) = selectDSReadWrite2Impl(Root, Size);

5400 return {{

5404 }};

5405}

5406

5407std::pair<Register, unsigned>

5408AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root,

5409 unsigned Size) const {

5411 int64_t ConstAddr = 0;

5412

5415 std::tie(PtrBase, Offset) =

5416 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);

5417

5419 int64_t OffsetValue0 = Offset;

5420 int64_t OffsetValue1 = Offset + Size;

5421 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1, Size)) {

5422

5423 return std::pair(PtrBase, OffsetValue0 / Size);

5424 }

5425 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {

5426

5427

5429

5430

5431 }

5432

5433 return std::pair(Root.getReg(), 0);

5434}

5435

5436

5437

5438

5439

5440std::pair<Register, int64_t>

5441AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(

5444 if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)

5445 return {Root, 0};

5446

5448 std::optional MaybeOffset =

5450 if (!MaybeOffset)

5451 return {Root, 0};

5452 return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()};

5453}

5454

5457}

5458

5459

5460

5464 Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

5465 Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

5466 Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);

5467 Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);

5468

5469 B.buildInstr(AMDGPU::S_MOV_B32)

5470 .addDef(RSrc2)

5471 .addImm(FormatLo);

5472 B.buildInstr(AMDGPU::S_MOV_B32)

5473 .addDef(RSrc3)

5474 .addImm(FormatHi);

5475

5476

5477

5478

5479 B.buildInstr(AMDGPU::REG_SEQUENCE)

5480 .addDef(RSrcHi)

5481 .addReg(RSrc2)

5482 .addImm(AMDGPU::sub0)

5483 .addReg(RSrc3)

5484 .addImm(AMDGPU::sub1);

5485

5487 if (!BasePtr) {

5488 RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);

5489 B.buildInstr(AMDGPU::S_MOV_B64)

5490 .addDef(RSrcLo)

5491 .addImm(0);

5492 }

5493

5494 B.buildInstr(AMDGPU::REG_SEQUENCE)

5495 .addDef(RSrc)

5496 .addReg(RSrcLo)

5497 .addImm(AMDGPU::sub0_sub1)

5498 .addReg(RSrcHi)

5499 .addImm(AMDGPU::sub2_sub3);

5500

5501 return RSrc;

5502}

5503

5506 uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();

5507

5508

5509

5511}

5512

5515 uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();

5516

5517

5518

5520}

5521

5522AMDGPUInstructionSelector::MUBUFAddressData

5523AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const {

5524 MUBUFAddressData Data;

5525 Data.N0 = Src;

5526

5529

5530 std::tie(PtrBase, Offset) = getPtrBaseWithConstantOffset(Src, *MRI);

5531 if (isUInt<32>(Offset)) {

5532 Data.N0 = PtrBase;

5534 }

5535

5538 Data.N2 = InputAdd->getOperand(1).getReg();

5539 Data.N3 = InputAdd->getOperand(2).getReg();

5540

5541

5542

5543

5544

5545

5548 }

5549

5550 return Data;

5551}

5552

5553

5554bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {

5555

5556

5557 if (Addr.N2)

5558 return true;

5559

5561 return N0Bank->getID() == AMDGPU::VGPRRegBankID;

5562}

5563

5564

5565

5566

5567void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(

5570 return;

5571

5572

5573 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5574 B.buildInstr(AMDGPU::S_MOV_B32)

5575 .addDef(SOffset)

5576 .addImm(ImmOffset);

5577 ImmOffset = 0;

5578}

5579

5580bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(

5583

5584

5586 return false;

5587

5588 MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());

5589 if (!shouldUseAddr64(AddrData))

5590 return false;

5591

5595 Offset = AddrData.Offset;

5596

5597

5599

5600 if (N2) {

5601 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {

5603 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {

5604

5605

5606 VAddr = N0;

5607 } else {

5608 SRDPtr = N3;

5609 VAddr = N2;

5610 }

5611 } else {

5612

5613 SRDPtr = N2;

5614 VAddr = N3;

5615 }

5616 } else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {

5617

5618 VAddr = N0;

5619 } else {

5620

5621

5622 SRDPtr = N0;

5623 }

5624

5627 splitIllegalMUBUFOffset(B, SOffset, Offset);

5628 return true;

5629}

5630

5631bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(

5633 int64_t &Offset) const {

5634

5635

5637 return false;

5638

5639 MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());

5640 if (shouldUseAddr64(AddrData))

5641 return false;

5642

5643

5644

5645 Register SRDPtr = AddrData.N0;

5646 Offset = AddrData.Offset;

5647

5648

5650

5652 splitIllegalMUBUFOffset(B, SOffset, Offset);

5653 return true;

5654}

5655

5657AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {

5662

5663 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))

5664 return {};

5665

5666

5667

5668 return {{

5670 MIB.addReg(RSrcReg);

5671 },

5674 },

5676 if (SOffset)

5677 MIB.addReg(SOffset);

5679 MIB.addReg(AMDGPU::SGPR_NULL);

5680 else

5682 },

5685 },

5689 }};

5690}

5691

5693AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {

5697

5698 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))

5699 return {};

5700

5701 return {{

5703 MIB.addReg(RSrcReg);

5704 },

5706 if (SOffset)

5707 MIB.addReg(SOffset);

5709 MIB.addReg(AMDGPU::SGPR_NULL);

5710 else

5712 },

5717 }};

5718}

5719

5721AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {

5722

5724

5726 SOffset = AMDGPU::SGPR_NULL;

5727

5729}

5730

5731

5732static std::optional<uint64_t>

5734

5736 if (!OffsetVal || !isInt<32>(*OffsetVal))

5737 return std::nullopt;

5738 return Lo_32(*OffsetVal);

5739}

5740

5742AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {

5743 std::optional<uint64_t> OffsetVal =

5745 if (!OffsetVal)

5746 return {};

5747

5748 std::optional<int64_t> EncodedImm =

5750 if (!EncodedImm)

5751 return {};

5752

5754}

5755

5757AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {

5759

5761 if (!OffsetVal)

5762 return {};

5763

5764 std::optional<int64_t> EncodedImm =

5766 if (!EncodedImm)

5767 return {};

5768

5770}

5771

5773AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {

5774

5775

5779 *MRI, Root.getReg(), KB, true);

5780 if (!SOffset)

5781 return std::nullopt;

5782

5783 std::optional<int64_t> EncodedOffset =

5785 if (!EncodedOffset)

5786 return std::nullopt;

5787

5791}

5792

5793std::pair<Register, unsigned>

5794AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,

5795 bool &Matched) const {

5796 Matched = false;

5797

5799 unsigned Mods;

5800 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

5801

5804

5805

5806

5807

5809

5810 const auto CheckAbsNeg = [&]() {

5811

5812

5814 unsigned ModsTmp;

5815 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);

5816

5819

5822 }

5823 };

5824

5825 CheckAbsNeg();

5826

5827

5828

5829

5830

5831

5833

5836 CheckAbsNeg();

5837 }

5838

5839 Matched = true;

5840 }

5841

5842 return {Src, Mods};

5843}

5844

5846AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(

5849 unsigned Mods;

5850 bool Matched;

5851 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);

5852 if (!Matched)

5853 return {};

5854

5855 return {{

5858 }};

5859}

5860

5862AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {

5864 unsigned Mods;

5865 bool Matched;

5866 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);

5867

5868 return {{

5871 }};

5872}

5873

5874bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(

5878 Register CCReg = I.getOperand(0).getReg();

5879

5880 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))

5881 .addImm(I.getOperand(2).getImm());

5882

5883 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);

5884

5885 I.eraseFromParent();

5887 *MRI);

5888}

5889

5890bool AMDGPUInstructionSelector::selectSGetBarrierState(

5895 std::optional<int64_t> BarValImm =

5897

5898 if (!BarValImm) {

5899 auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

5902 }

5904 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM

5905 : AMDGPU::S_GET_BARRIER_STATE_M0;

5907

5908 auto DstReg = I.getOperand(0).getReg();

5910 TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);

5912 return false;

5914 if (BarValImm) {

5915 MIB.addImm(*BarValImm);

5916 }

5917 I.eraseFromParent();

5918 return true;

5919}

5920

5922 if (HasInlineConst) {

5923 switch (IntrID) {

5924 default:

5926 case Intrinsic::amdgcn_s_barrier_join:

5927 return AMDGPU::S_BARRIER_JOIN_IMM;

5928 case Intrinsic::amdgcn_s_get_named_barrier_state:

5929 return AMDGPU::S_GET_BARRIER_STATE_IMM;

5930 };

5931 } else {

5932 switch (IntrID) {

5933 default:

5935 case Intrinsic::amdgcn_s_barrier_join:

5936 return AMDGPU::S_BARRIER_JOIN_M0;

5937 case Intrinsic::amdgcn_s_get_named_barrier_state:

5938 return AMDGPU::S_GET_BARRIER_STATE_M0;

5939 };

5940 }

5941}

5942

5943bool AMDGPUInstructionSelector::selectNamedBarrierInit(

5949

5950

5951 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5952 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg0)

5953 .add(BarOp)

5956

5957 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5958 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg1)

5962

5963

5964 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5965 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg2)

5966 .add(CntOp)

5969

5970 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5971 constexpr unsigned ShAmt = 16;

5972 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg3)

5976

5977 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5978 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg4)

5982

5983 auto CopyMIB =

5984 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(TmpReg4);

5986

5987 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init

5988 ? AMDGPU::S_BARRIER_INIT_M0

5989 : AMDGPU::S_BARRIER_SIGNAL_M0;

5992

5993 I.eraseFromParent();

5994 return true;

5995}

5996

5997bool AMDGPUInstructionSelector::selectNamedBarrierInst(

6001 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state

6002 ? I.getOperand(2)

6003 : I.getOperand(1);

6004 std::optional<int64_t> BarValImm =

6006

6007 if (!BarValImm) {

6008

6009 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

6010 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg0)

6014

6015 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

6016 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg1)

6020

6021 auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

6024 }

6025

6027 unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID);

6029

6030 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {

6031 auto DstReg = I.getOperand(0).getReg();

6033 TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);

6035 return false;

6037 }

6038

6039 if (BarValImm) {

6040 auto BarId = ((*BarValImm) >> 4) & 0x3F;

6042 }

6043

6044 I.eraseFromParent();

6045 return true;

6046}

6047

6048void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,

6050 int OpIdx) const {

6051 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

6052 "Expected G_CONSTANT");

6053 MIB.addImm(MI.getOperand(1).getCImm()->getSExtValue());

6054}

6055

6056void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,

6058 int OpIdx) const {

6059 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

6060 "Expected G_CONSTANT");

6061 MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());

6062}

6063

6064void AMDGPUInstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB,

6066 int OpIdx) const {

6068 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);

6069 MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());

6070}

6071

6072void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,

6074 int OpIdx) const {

6075 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

6076 "Expected G_CONSTANT");

6077 MIB.addImm(MI.getOperand(1).getCImm()->getValue().popcount());

6078}

6079

6080

6081

6082void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,

6084 int OpIdx) const {

6086 int64_t Imm;

6089 else

6091}

6092

6093void AMDGPUInstructionSelector::renderZextBoolTImm(MachineInstrBuilder &MIB,

6095 int OpIdx) const {

6096 MIB.addImm(MI.getOperand(OpIdx).getImm() != 0);

6097}

6098

6099void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB,

6101 int OpIdx) const {

6102 assert(OpIdx >= 0 && "expected to match an immediate operand");

6104}

6105

6106void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(

6108 assert(OpIdx >= 0 && "expected to match an immediate operand");

6111}

6112

6113void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(

6115 assert(OpIdx >= 0 && "expected to match an immediate operand");

6116 MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x2)

6119}

6120

6121void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(

6123 assert(OpIdx >= 0 && "expected to match an immediate operand");

6126}

6127

6128void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(

6130 assert(OpIdx >= 0 && "expected to match an immediate operand");

6131 MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x1)

6133 : 0);

6134}

6135

6136void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(

6138 assert(OpIdx >= 0 && "expected to match an immediate operand");

6140 : 0);

6141}

6142

6143void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(

6145 assert(OpIdx >= 0 && "expected to match an immediate operand");

6147 : 0);

6148}

6149

6150void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(

6152 assert(OpIdx >= 0 && "expected to match an immediate operand");

6155}

6156

6157void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(

6159 assert(OpIdx >= 0 && "expected to match an immediate operand");

6162}

6163

6164void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,

6166 int OpIdx) const {

6167 assert(OpIdx >= 0 && "expected to match an immediate operand");

6168 MIB.addImm(MI.getOperand(OpIdx).getImm() &

6171}

6172

6173void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,

6175 int OpIdx) const {

6176 assert(OpIdx >= 0 && "expected to match an immediate operand");

6177 const bool Swizzle = MI.getOperand(OpIdx).getImm() &

6181}

6182

6183void AMDGPUInstructionSelector::renderExtractCpolSetGLC(

6185 assert(OpIdx >= 0 && "expected to match an immediate operand");

6186 const uint32_t Cpol = MI.getOperand(OpIdx).getImm() &

6190}

6191

6192void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,

6194 int OpIdx) const {

6196}

6197

6198void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,

6200 int OpIdx) const {

6201 const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF();

6203 assert(ExpVal != INT_MIN);

6205}

6206

6207void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,

6209 int OpIdx) const {

6210

6211

6212

6213

6214 MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);

6215}

6216

6217

6218void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(

6220 unsigned Val = MI.getOperand(OpIdx).getImm();

6221 unsigned New = 0;

6222 if (Val & 0x1)

6224 if (Val & 0x2)

6227}

6228

6229bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {

6231}

6232

6233bool AMDGPUInstructionSelector::isInlineImmediate(const APFloat &Imm) const {

6235}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder MachineInstrBuilder & DefMI

static unsigned getIntrinsicID(const SDNode *N)

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

static bool isNoUnsignedWrap(MachineInstr *Addr)

static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)

unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)

#define GET_GLOBALISEL_PREDICATES_INIT

#define GET_GLOBALISEL_TEMPORARIES_INIT

static Register getWaveAddress(const MachineInstr *Def)

static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)

static bool shouldUseAndMask(unsigned Size, unsigned &Mask)

static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)

static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)

static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)

static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)

Return the register to use for the index value, and the subregister to use for the indirectly accesse...

static void addZeroImm(MachineInstrBuilder &MIB)

static unsigned gwsIntrinToOpcode(unsigned IntrID)

static bool isConstant(const MachineInstr &MI)

static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)

static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)

Return a resource descriptor for use with an arbitrary 64-bit pointer.

static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)

Match a zero extend from a 32-bit value to 64-bits.

static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)

static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)

static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)

static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)

Get an immediate that must be 32-bits, and treated as zero extended.

static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)

static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)

static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)

static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)

This file declares the targeting of the InstructionSelector class for AMDGPU.

AMDGPU Register Bank Select

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

Provides analysis for querying information about KnownBits during GISel passes.

Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

const HexagonInstrInfo * TII

Contains matchers for matching SSA Machine Instructions.

This file declares the MachineIRBuilder class.

unsigned const TargetRegisterInfo * TRI

static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)

static const char * getName()

bool select(MachineInstr &I) override

Select the (possibly generic) instruction I to only use target-specific opcodes.

void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override

Setup per-MF executor state.

uint32_t getLDSSize() const

bool isEntryFunction() const

const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override

Get a register bank that covers RC.

bool useRealTrue16Insts() const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...

std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const

unsigned getWavefrontSizeLog2() const

unsigned getWavefrontSize() const

bool hasInv2PiInlineImm() const

static int64_t getNullPointerValue(unsigned AddrSpace)

Get the integer value of a null pointer in the given address space.

LLVM_READONLY int getExactLog2Abs() const

Class for arbitrary precision integers.

APInt zext(unsigned width) const

Zero extend to a new width.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

int64_t getSExtValue() const

Get sign extended value.

unsigned countr_one() const

Count the number of trailing one bits.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ FCMP_OEQ

0 0 0 1 True if ordered and equal

@ FCMP_TRUE

1 1 1 1 Always true (always folded)

@ ICMP_SLT

signed less than

@ ICMP_SLE

signed less or equal

@ FCMP_OLT

0 1 0 0 True if ordered and less than

@ FCMP_ULE

1 1 0 1 True if unordered, less than, or equal

@ FCMP_OGT

0 0 1 0 True if ordered and greater than

@ FCMP_OGE

0 0 1 1 True if ordered and greater than or equal

@ ICMP_UGE

unsigned greater or equal

@ ICMP_UGT

unsigned greater than

@ ICMP_SGT

signed greater than

@ FCMP_ULT

1 1 0 0 True if unordered or less than

@ FCMP_ONE

0 1 1 0 True if ordered and operands are unequal

@ FCMP_UEQ

1 0 0 1 True if unordered or equal

@ ICMP_ULT

unsigned less than

@ FCMP_UGT

1 0 1 0 True if unordered or greater than

@ FCMP_OLE

0 1 0 1 True if ordered and less than or equal

@ FCMP_ORD

0 1 1 1 True if ordered (no nans)

@ ICMP_SGE

signed greater or equal

@ FCMP_UNE

1 1 1 0 True if unordered or not equal

@ ICMP_ULE

unsigned less or equal

@ FCMP_UGE

1 0 1 1 True if unordered, greater than, or equal

@ FCMP_FALSE

0 0 0 0 Always false (always folded)

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

bool isFPPredicate() const

bool isIntPredicate() const

ConstantFP - Floating Point Values [float, double].

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

This class represents an Operation in the Expression.

Diagnostic information for unsupported feature in backend.

Represents a G_BUILD_VECTOR.

bool useVGPRIndexMode() const

bool hasPermlane32Swap() const

bool hasScalarCompareEq64() const

int getLDSBankCount() const

bool hasUsableDSOffset() const

True if the offset field of DS instructions works as expected.

bool unsafeDSOffsetFoldingEnabled() const

bool hasBitOp3Insts() const

bool hasFlatInstOffsets() const

bool hasCompressedExport() const

Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...

bool hasGFX90AInsts() const

bool hasLDSLoadB96_B128() const

Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...

unsigned getConstantBusLimit(unsigned Opcode) const

bool hasMADIntraFwdBug() const

bool privateMemoryResourceIsRangeChecked() const

bool hasSignedScratchOffsets() const

bool hasRestrictedSOffset() const

const SITargetLowering * getTargetLowering() const override

bool ldsRequiresM0Init() const

Return if most LDS instructions have an m0 use that require m0 to be initialized.

bool hasSPackHL() const

Return true if the target has the S_PACK_HL_B32_B16 instruction.

bool hasPermlane16Swap() const

bool hasFlatScratchSVSSwizzleBug() const

bool useFlatForGlobal() const

Generation getGeneration() const

bool hasSplitBarriers() const

bool hasUnpackedD16VMem() const

bool hasGWSSemaReleaseAll() const

bool hasAddNoCarry() const

bool hasSALUFloatInsts() const

bool hasPartialNSAEncoding() const

void checkSubtargetFeatures(const Function &F) const

Diagnose inconsistent subtarget features before attempting to codegen function F.

Represents a G_CONCAT_VECTORS.

std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns

virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)

Setup per-MF executor state.

CodeGenCoverage * CoverageInfo

APInt getKnownOnes(Register R)

KnownBits getKnownBits(Register R)

bool signBitIsZero(Register Op)

APInt getKnownZeroes(Register R)

Module * getParent()

Get the module that this global value is contained inside of...

constexpr bool isScalar() const

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

constexpr bool isValid() const

constexpr bool isVector() const

constexpr TypeSize getSizeInBits() const

Returns the total size of the type. Must only be called on sized types.

constexpr LLT getElementType() const

Returns the vector's element type. Only valid for vector types.

constexpr unsigned getAddressSpace() const

static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)

Get a low-level fixed-width vector of some number of elements and element width.

Describe properties that are true of each instruction in the target description file.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

void setReturnAddressIsTaken(bool s)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Helper class to build MachineInstr.

const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const

const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addFrameIndex(int Idx) const

const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

unsigned getNumOperands() const

Retuns the total number of operands.

void tieOperands(unsigned DefIdx, unsigned UseIdx)

Add a tie between the register operands at DefIdx and UseIdx.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

unsigned getAddrSpace() const

@ MOLoad

The memory access reads data.

@ MOStore

The memory access writes data.

const MachinePointerInfo & getPointerInfo() const

Flags getFlags() const

Return the raw flags of the source value,.

const Value * getValue() const

Return the base address of the memory access.

Align getBaseAlign() const

Return the minimum known alignment in bytes of the base address, without the offset.

MachineOperand class - Representation of each machine instruction operand.

unsigned getSubReg() const

const ConstantInt * getCImm() const

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

ArrayRef< int > getShuffleMask() const

void setReg(Register Reg)

Change the register this operand corresponds to.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

static MachineOperand CreateImm(int64_t Val)

bool isEarlyClobber() const

Register getReg() const

getReg - Returns the register number.

bool isInternalRead() const

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

A Module instance is used to store all the information related to an LLVM module.

Analysis providing profile information.

static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)

Constrain the (possibly generic) virtual register Reg to RC.

const RegisterBank & getRegBank(unsigned ID)

Get the register bank identified by ID.

TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const

Get the size in bits of Reg.

This class implements the register bank concept.

unsigned getID() const

Get the identifier of this register bank.

Wrapper class representing virtual and physical registers.

bool isLegalMUBUFImmOffset(unsigned Imm) const

bool isInlineConstant(const APInt &Imm) const

const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const

static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)

const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const

std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const

Split COffsetVal into {immediate offset field, remainder offset} values.

static unsigned getDSShaderTypeValue(const MachineFunction &MF)

bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const

Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.

void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)

MCRegister getReturnAddressReg(const MachineFunction &MF) const

ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const

const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const

const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override

const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const

const TargetRegisterClass * getBoolRC() const

MCRegister getExec() const

const TargetRegisterClass * getWaveMaskRegClass() const

static bool isSGPRClass(const TargetRegisterClass *RC)

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Register getStackPointerRegisterToSaveRestore() const

If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...

CodeGenOptLevel getOptLevel() const

Returns the optimization level: None, Less, Default, or Aggressive.

const Triple & getTargetTriple() const

unsigned getID() const

Return the register class ID number.

bool hasSubClassEq(const TargetRegisterClass *RC) const

Returns true if RC is a sub-class of or equal to this class.

bool hasSuperClassEq(const TargetRegisterClass *RC) const

Returns true if RC is a super-class of or equal to this class.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

OSType getOS() const

Get the parsed operating system type of this triple.

static IntegerType * getInt32Ty(LLVMContext &C)

LLVM Value Representation.

Value(Type *Ty, unsigned scid)

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ PRIVATE_ADDRESS

Address space for private memory.

constexpr char SymbolName[]

Key for Kernel::Metadata::mSymbolName.

LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)

LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)

std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)

bool isGFX12Plus(const MCSubtargetInfo &STI)

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)

bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)

bool isGFX11Plus(const MCSubtargetInfo &STI)

bool isGFX10Plus(const MCSubtargetInfo &STI)

std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)

unsigned getRegBitWidth(const TargetRegisterClass &RC)

Get the size in bits of a register from the register class RC.

LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

Intrinsic::ID getIntrinsicID(const MachineInstr &I)

Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.

const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)

std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)

Returns base register and constant offset.

IndexMode

ARM Index Modes.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

operand_type_match m_Reg()

GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)

UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)

SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)

Matches a constant equal to RequestedValue.

UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)

BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)

UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)

ConstantMatch< APInt > m_ICst(APInt &Cst)

SpecificConstantMatch m_AllOnesInt()

BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)

ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)

Or< Preds... > m_any_of(Preds &&... preds)

BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)

UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)

UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)

GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)

UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)

BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)

UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

OneUse_match< T > m_OneUse(const T &SubPattern)

BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)

Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

@ Kill

The last use of a register.

Reg

All possible values of the reg field in the ModR/M byte.

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())

Return a virtual register corresponding to the incoming argument register PhysReg.

Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)

Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...

MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)

See if Reg is defined by an single def instruction that is Opcode.

int popcount(T Value) noexcept

Count the number of set bits in a value.

const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)

If VReg is defined by a G_CONSTANT, return the corresponding value.

bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)

Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...

MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the def instruction for Reg, folding away any trivial copies.

std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)

If VReg is defined by a G_CONSTANT fits in int64_t returns it.

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

unsigned getUndefRegState(bool B)

@ SMax

Signed integer max implemented in terms of select(cmp()).

DWARFExpression::Operation Op

std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...

std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the def instruction for Reg, and underlying value Register folding away any copies.

Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the source register for Reg, folding away any trivial copies.

@ Default

The result values are uniform if and only if all operands are uniform.

This struct is a compact representation of a valid (non-zero power of two) alignment.

static KnownBits makeConstant(const APInt &C)

Create known bits from a known constant.

static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)

Compute knownbits resulting from addition of LHS and RHS.

This class contains a discriminated union of information about pointers in memory operands,...

int64_t Offset

Offset - This is an offset from the base Value*.

PointerUnion< const Value *, const PseudoSourceValue * > V

This is the IR pointer value for the access, or it is null if unknown.