LLVM: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

29#include "llvm/IR/IntrinsicsAMDGPU.h"

30#include

31

32#define DEBUG_TYPE "amdgpu-isel"

33

34using namespace llvm;

35using namespace MIPatternMatch;

36

37#define GET_GLOBALISEL_IMPL

38#define AMDGPUSubtarget GCNSubtarget

39#include "AMDGPUGenGlobalISel.inc"

40#undef GET_GLOBALISEL_IMPL

41#undef AMDGPUSubtarget

42

46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),

47 STI(STI),

49#include "AMDGPUGenGlobalISel.inc"

52#include "AMDGPUGenGlobalISel.inc"

54{

55}

56

58

67}

68

69

71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS

72 ? Def->getOperand(1).getReg()

74}

75

76bool AMDGPUInstructionSelector::isVCC(Register Reg,

78

79 if (Reg.isPhysical())

80 return false;

81

82 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);

84 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

85 if (RC) {

86 const LLT Ty = MRI.getType(Reg);

88 return false;

89

90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&

92 }

93

94 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);

95 return RB->getID() == AMDGPU::VCCRegBankID;

96}

97

98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,

99 unsigned NewOpc) const {

100 MI.setDesc(TII.get(NewOpc));

101 MI.removeOperand(1);

103

106

107

109 return false;

110

115 if (!DstRC || DstRC != SrcRC)

116 return false;

117

120}

121

122bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {

125 I.setDesc(TII.get(TargetOpcode::COPY));

126

129 Register DstReg = Dst.getReg();

130 Register SrcReg = Src.getReg();

131

132 if (isVCC(DstReg, *MRI)) {

133 if (SrcReg == AMDGPU::SCC) {

136 if (!RC)

137 return true;

139 }

140

141 if (!isVCC(SrcReg, *MRI)) {

142

144 return false;

145

148

149 std::optional ConstVal =

151 if (ConstVal) {

152 unsigned MovOpc =

153 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;

154 BuildMI(*BB, &I, DL, TII.get(MovOpc), DstReg)

155 .addImm(ConstVal->Value.getBoolValue() ? -1 : 0);

156 } else {

157 Register MaskedReg = MRI->createVirtualRegister(SrcRC);

158

159

160

161

162

165 const int64_t NoMods = 0;

166 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)

172 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)

178 } else {

180 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;

181 auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)

184 if (IsSGPR)

185 And.setOperandDead(3);

186

187 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)

190 }

191 }

192

193 if (MRI->getRegClassOrNull(SrcReg))

194 MRI->setRegClass(SrcReg, SrcRC);

195 I.eraseFromParent();

196 return true;

197 }

198

202 return false;

203

204 return true;

205 }

206

208 if (MO.getReg().isPhysical())

209 continue;

210

213 if (!RC)

214 continue;

216 }

217 return true;

218}

219

220bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {

221 const Register DefReg = I.getOperand(0).getReg();

222 const LLT DefTy = MRI->getType(DefReg);

223

224

225

226

227

229 return false;

230

231

232

234 MRI->getRegClassOrRegBank(DefReg);

235

237 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

238 if (!DefRC) {

240 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");

241 return false;

242 }

243

244 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);

246 if (!DefRC) {

247 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");

248 return false;

249 }

250 }

251

252

253 I.setDesc(TII.get(TargetOpcode::PHI));

255}

256

258AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,

260 unsigned SubIdx) const {

261

264 Register DstReg = MRI->createVirtualRegister(&SubRC);

265

266 if (MO.isReg()) {

267 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);

269 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)

270 .addReg(Reg, 0, ComposedSubIdx);

271

276 }

277

279

281

282 switch (SubIdx) {

283 default:

284 llvm_unreachable("do not know to split immediate with this sub index.");

285 case AMDGPU::sub0:

287 case AMDGPU::sub1:

289 }

290}

291

293 switch (Opc) {

294 case AMDGPU::G_AND:

295 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;

296 case AMDGPU::G_OR:

297 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;

298 case AMDGPU::G_XOR:

299 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;

300 default:

302 }

303}

304

305bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {

306 Register DstReg = I.getOperand(0).getReg();

308

310 if (DstRB->getID() != AMDGPU::SGPRRegBankID &&

311 DstRB->getID() != AMDGPU::VCCRegBankID)

312 return false;

313

314 bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&

317

318

320 true,

321 false,

322 true));

324}

325

326bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {

329 Register DstReg = I.getOperand(0).getReg();

331 LLT Ty = MRI->getType(DstReg);

333 return false;

334

337 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;

338 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;

339

340 if (Size == 32) {

341 if (IsSALU) {

342 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;

344 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)

345 .add(I.getOperand(1))

346 .add(I.getOperand(2))

348 I.eraseFromParent();

350 }

351

353 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;

354 I.setDesc(TII.get(Opc));

358 }

359

360 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;

361

364 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)

366 .add(I.getOperand(1))

367 .add(I.getOperand(2))

369 I.eraseFromParent();

371 }

372

373 assert(!Sub && "illegal sub should not reach here");

374

376 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;

378 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;

379

380 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));

381 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));

382 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));

383 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));

384

385 Register DstLo = MRI->createVirtualRegister(&HalfRC);

386 Register DstHi = MRI->createVirtualRegister(&HalfRC);

387

388 if (IsSALU) {

389 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)

390 .add(Lo1)

391 .add(Lo2);

392 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)

393 .add(Hi1)

394 .add(Hi2)

396 } else {

398 Register CarryReg = MRI->createVirtualRegister(CarryRC);

399 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)

401 .add(Lo1)

402 .add(Lo2)

406 .add(Hi1)

407 .add(Hi2)

410

412 return false;

413 }

414

415 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

417 .addImm(AMDGPU::sub0)

419 .addImm(AMDGPU::sub1);

420

421

423 return false;

424

425 I.eraseFromParent();

426 return true;

427}

428

429bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(

434 Register Dst0Reg = I.getOperand(0).getReg();

435 Register Dst1Reg = I.getOperand(1).getReg();

436 const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO ||

437 I.getOpcode() == AMDGPU::G_UADDE;

438 const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE ||

439 I.getOpcode() == AMDGPU::G_USUBE;

440

441 if (isVCC(Dst1Reg, *MRI)) {

442 unsigned NoCarryOpc =

443 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;

444 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;

445 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));

449 }

450

451 Register Src0Reg = I.getOperand(2).getReg();

452 Register Src1Reg = I.getOperand(3).getReg();

453

454 if (HasCarryIn) {

455 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)

456 .addReg(I.getOperand(4).getReg());

457 }

458

459 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;

460 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;

461

462 auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)

463 .add(I.getOperand(2))

464 .add(I.getOperand(3));

465

466 if (MRI->use_nodbg_empty(Dst1Reg)) {

468 } else {

469 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)

470 .addReg(AMDGPU::SCC);

471 if (MRI->getRegClassOrNull(Dst1Reg))

472 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);

473 }

474

478 return false;

479

480 if (HasCarryIn &&

482 AMDGPU::SReg_32RegClass, *MRI))

483 return false;

484

485 I.eraseFromParent();

486 return true;

487}

488

489bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(

493 const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;

494

495 unsigned Opc;

497 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64

498 : AMDGPU::V_MAD_I64_I32_gfx11_e64;

499 else

500 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;

501 I.setDesc(TII.get(Opc));

503 I.addImplicitDefUseOperands(*MF);

505}

506

507

508bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {

510 Register DstReg = I.getOperand(0).getReg();

511 Register SrcReg = I.getOperand(1).getReg();

512 LLT DstTy = MRI->getType(DstReg);

513 LLT SrcTy = MRI->getType(SrcReg);

516

517

518 unsigned Offset = I.getOperand(2).getImm();

519 if (Offset % 32 != 0 || DstSize > 128)

520 return false;

521

522

523

524 if (DstSize == 16)

525 DstSize = 32;

526

530 return false;

531

535 if (!SrcRC)

536 return false;

538 DstSize / 32);

539 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);

540 if (!SrcRC)

541 return false;

542

544 *SrcRC, I.getOperand(1));

546 BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)

548

549 I.eraseFromParent();

550 return true;

551}

552

553bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {

555 Register DstReg = MI.getOperand(0).getReg();

556 LLT DstTy = MRI->getType(DstReg);

557 LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());

558

560 if (SrcSize < 32)

562

568 if (!DstRC)

569 return false;

570

573 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);

574 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {

578

582 return false;

583 }

584

586 return false;

587

588 MI.eraseFromParent();

589 return true;

590}

591

592bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {

594 const int NumDst = MI.getNumOperands() - 1;

595

597

598 Register SrcReg = Src.getReg();

599 Register DstReg0 = MI.getOperand(0).getReg();

600 LLT DstTy = MRI->getType(DstReg0);

601 LLT SrcTy = MRI->getType(SrcReg);

602

607

611 return false;

612

613

614

615

617 for (int I = 0, E = NumDst; I != E; ++I) {

619 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())

620 .addReg(SrcReg, 0, SubRegs[I]);

621

622

623 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]);

625 return false;

626

630 return false;

631 }

632

633 MI.eraseFromParent();

634 return true;

635}

636

637bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {

638 assert(MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||

639 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);

640

641 Register Src0 = MI.getOperand(1).getReg();

642 Register Src1 = MI.getOperand(2).getReg();

643 LLT SrcTy = MRI->getType(Src0);

645

646

647 if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {

648 return selectG_MERGE_VALUES(MI);

649 }

650

651

652

653 Register Dst = MI.getOperand(0).getReg();

655 (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&

658

660 if (DstBank->getID() == AMDGPU::AGPRRegBankID)

661 return false;

662

663 assert(DstBank->getID() == AMDGPU::SGPRRegBankID ||

664 DstBank->getID() == AMDGPU::VGPRRegBankID);

665 const bool IsVector = DstBank->getID() == AMDGPU::VGPRRegBankID;

666

669

670

671

672

674 if (ConstSrc1) {

675 auto ConstSrc0 =

677 if (ConstSrc0) {

678 const int64_t K0 = ConstSrc0->Value.getSExtValue();

679 const int64_t K1 = ConstSrc1->Value.getSExtValue();

683

684

685 if (IsVector) {

686 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), Dst).addImm(Imm);

687 MI.eraseFromParent();

689 }

690

691

692 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst).addImm(Imm);

693 MI.eraseFromParent();

695 }

696 }

697

698

700 return true;

701

702

703

705 if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {

706 MI.setDesc(TII.get(AMDGPU::COPY));

707 MI.removeOperand(2);

708 const auto &RC =

709 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;

712 }

713

714

715 if (IsVector) {

716 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

717 auto MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)

721 return false;

722

723 MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)

728 return false;

729

730 MI.eraseFromParent();

731 return true;

732 }

733

736

737

738

739

740

741

742

743

744

745

746

747

748

751

754

755 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;

756 if (Shift0 && Shift1) {

757 Opc = AMDGPU::S_PACK_HH_B32_B16;

758 MI.getOperand(1).setReg(ShiftSrc0);

759 MI.getOperand(2).setReg(ShiftSrc1);

760 } else if (Shift1) {

761 Opc = AMDGPU::S_PACK_LH_B32_B16;

762 MI.getOperand(2).setReg(ShiftSrc1);

763 } else if (Shift0) {

764 auto ConstSrc1 =

766 if (ConstSrc1 && ConstSrc1->Value == 0) {

767

768 auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)

772

773 MI.eraseFromParent();

775 }

777 Opc = AMDGPU::S_PACK_HL_B32_B16;

778 MI.getOperand(1).setReg(ShiftSrc0);

779 }

780 }

781

782 MI.setDesc(TII.get(Opc));

784}

785

786bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {

788

789

790

792 if ((!RC && MRI->getRegBankOrNull(MO.getReg())) ||

794 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));

795 return true;

796 }

797

798 return false;

799}

800

801bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {

803

804 Register DstReg = I.getOperand(0).getReg();

805 Register Src0Reg = I.getOperand(1).getReg();

806 Register Src1Reg = I.getOperand(2).getReg();

807 LLT Src1Ty = MRI->getType(Src1Reg);

808

809 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();

811

812 int64_t Offset = I.getOperand(3).getImm();

813

814

815 if (Offset % 32 != 0 || InsSize % 32 != 0)

816 return false;

817

818

819 if (InsSize > 128)

820 return false;

821

823 if (SubReg == AMDGPU::NoSubRegister)

824 return false;

825

829 if (!DstRC)

830 return false;

831

838

839

840

841 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);

842 if (!Src0RC || !Src1RC)

843 return false;

844

848 return false;

849

851 BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)

855

856 I.eraseFromParent();

857 return true;

858}

859

860bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(MachineInstr &MI) const {

861 Register DstReg = MI.getOperand(0).getReg();

862 Register SrcReg = MI.getOperand(1).getReg();

863 Register OffsetReg = MI.getOperand(2).getReg();

864 Register WidthReg = MI.getOperand(3).getReg();

865

867 "scalar BFX instructions are expanded in regbankselect");

868 assert(MRI->getType(MI.getOperand(0).getReg()).getSizeInBits() == 32 &&

869 "64-bit vector BFX instructions are expanded in regbankselect");

870

873

874 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SBFX;

875 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;

876 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), DstReg)

880 MI.eraseFromParent();

882}

883

884bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {

887

888 Register Dst = MI.getOperand(0).getReg();

889 Register Src0 = MI.getOperand(2).getReg();

890 Register M0Val = MI.getOperand(6).getReg();

894 return false;

895

896

897

898

899

900

901

902

903 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

906

907 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

909 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)

911 .addImm(MI.getOperand(4).getImm())

912 .addImm(MI.getOperand(3).getImm());

913

914 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_P1LV_F16), Dst)

915 .addImm(0)

916 .addReg(Src0)

917 .addImm(MI.getOperand(4).getImm())

918 .addImm(MI.getOperand(3).getImm())

919 .addImm(0)

920 .addReg(InterpMov)

921 .addImm(MI.getOperand(5).getImm())

924

925 MI.eraseFromParent();

926 return true;

927}

928

929

930

931

932

933

934bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {

935

938

941 Register VDst = MI.getOperand(0).getReg();

942 Register Val = MI.getOperand(2).getReg();

943 Register LaneSelect = MI.getOperand(3).getReg();

944 Register VDstIn = MI.getOperand(4).getReg();

945

946 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);

947

948 std::optional ConstSelect =

950 if (ConstSelect) {

951

952

954 MIB.addImm(ConstSelect->Value.getSExtValue() &

956 } else {

957 std::optional ConstVal =

959

960

961

964 MIB.addImm(ConstVal->Value.getSExtValue());

965 MIB.addReg(LaneSelect);

966 } else {

968

969

970

971

973

974 BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

976 MIB.addReg(AMDGPU::M0);

977 }

978 }

979

981

982 MI.eraseFromParent();

984}

985

986

987

988bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {

989 Register Dst0 = MI.getOperand(0).getReg();

990 Register Dst1 = MI.getOperand(1).getReg();

991

992 LLT Ty = MRI->getType(Dst0);

993 unsigned Opc;

995 Opc = AMDGPU::V_DIV_SCALE_F32_e64;

997 Opc = AMDGPU::V_DIV_SCALE_F64_e64;

998 else

999 return false;

1000

1001

1002

1005

1006 Register Numer = MI.getOperand(3).getReg();

1007 Register Denom = MI.getOperand(4).getReg();

1008 unsigned ChooseDenom = MI.getOperand(5).getImm();

1009

1010 Register Src0 = ChooseDenom != 0 ? Numer : Denom;

1011

1012 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)

1014 .addImm(0)

1015 .addUse(Src0)

1016 .addImm(0)

1017 .addUse(Denom)

1018 .addImm(0)

1019 .addUse(Numer)

1020 .addImm(0)

1021 .addImm(0);

1022

1023 MI.eraseFromParent();

1025}

1026

1027bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {

1028 Intrinsic::ID IntrinsicID = cast(I).getIntrinsicID();

1029 switch (IntrinsicID) {

1030 case Intrinsic::amdgcn_if_break: {

1032

1033

1034

1035 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))

1036 .add(I.getOperand(0))

1037 .add(I.getOperand(2))

1038 .add(I.getOperand(3));

1039

1040 Register DstReg = I.getOperand(0).getReg();

1041 Register Src0Reg = I.getOperand(2).getReg();

1042 Register Src1Reg = I.getOperand(3).getReg();

1043

1044 I.eraseFromParent();

1045

1046 for (Register Reg : { DstReg, Src0Reg, Src1Reg })

1048

1049 return true;

1050 }

1051 case Intrinsic::amdgcn_interp_p1_f16:

1052 return selectInterpP1F16(I);

1053 case Intrinsic::amdgcn_wqm:

1054 return constrainCopyLikeIntrin(I, AMDGPU::WQM);

1055 case Intrinsic::amdgcn_softwqm:

1056 return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);

1057 case Intrinsic::amdgcn_strict_wwm:

1058 case Intrinsic::amdgcn_wwm:

1059 return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WWM);

1060 case Intrinsic::amdgcn_strict_wqm:

1061 return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WQM);

1062 case Intrinsic::amdgcn_writelane:

1063 return selectWritelane(I);

1064 case Intrinsic::amdgcn_div_scale:

1065 return selectDivScale(I);

1066 case Intrinsic::amdgcn_icmp:

1067 case Intrinsic::amdgcn_fcmp:

1069 return true;

1070 return selectIntrinsicCmp(I);

1071 case Intrinsic::amdgcn_ballot:

1072 return selectBallot(I);

1073 case Intrinsic::amdgcn_reloc_constant:

1074 return selectRelocConstant(I);

1075 case Intrinsic::amdgcn_groupstaticsize:

1076 return selectGroupStaticSize(I);

1077 case Intrinsic::returnaddress:

1078 return selectReturnAddress(I);

1079 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:

1080 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:

1081 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:

1082 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:

1083 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:

1084 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:

1085 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:

1086 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:

1087 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:

1088 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:

1089 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:

1090 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:

1091 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:

1092 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:

1093 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:

1094 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:

1095 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:

1096 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:

1097 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:

1098 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:

1099 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:

1100 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:

1101 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:

1102 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:

1103 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:

1104 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:

1105 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:

1106 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:

1107 return selectSMFMACIntrin(I);

1108 case Intrinsic::amdgcn_permlane16_swap:

1109 case Intrinsic::amdgcn_permlane32_swap:

1110 return selectPermlaneSwapIntrin(I, IntrinsicID);

1111 default:

1113 }

1114}

1115

1119 return -1;

1120

1121 if (Size == 16 && !ST.has16BitInsts())

1122 return -1;

1123

1124 const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc,

1125 unsigned FakeS16Opc, unsigned S32Opc,

1126 unsigned S64Opc) {

1127 if (Size == 16)

1128

1129 return ST.hasTrue16BitInsts()

1130 ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc

1131 : S16Opc;

1132 if (Size == 32)

1133 return S32Opc;

1134 return S64Opc;

1135 };

1136

1137 switch (P) {

1138 default:

1141 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,

1142 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,

1143 AMDGPU::V_CMP_NE_U64_e64);

1145 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,

1146 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,

1147 AMDGPU::V_CMP_EQ_U64_e64);

1149 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,

1150 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,

1151 AMDGPU::V_CMP_GT_I64_e64);

1153 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,

1154 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,

1155 AMDGPU::V_CMP_GE_I64_e64);

1157 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,

1158 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,

1159 AMDGPU::V_CMP_LT_I64_e64);

1161 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,

1162 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,

1163 AMDGPU::V_CMP_LE_I64_e64);

1165 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,

1166 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,

1167 AMDGPU::V_CMP_GT_U64_e64);

1169 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,

1170 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,

1171 AMDGPU::V_CMP_GE_U64_e64);

1173 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,

1174 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,

1175 AMDGPU::V_CMP_LT_U64_e64);

1177 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,

1178 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,

1179 AMDGPU::V_CMP_LE_U64_e64);

1180

1182 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,

1183 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,

1184 AMDGPU::V_CMP_EQ_F64_e64);

1186 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,

1187 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,

1188 AMDGPU::V_CMP_GT_F64_e64);

1190 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,

1191 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,

1192 AMDGPU::V_CMP_GE_F64_e64);

1194 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,

1195 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,

1196 AMDGPU::V_CMP_LT_F64_e64);

1198 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,

1199 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,

1200 AMDGPU::V_CMP_LE_F64_e64);

1202 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,

1203 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,

1204 AMDGPU::V_CMP_NEQ_F64_e64);

1206 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,

1207 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,

1208 AMDGPU::V_CMP_O_F64_e64);

1210 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,

1211 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,

1212 AMDGPU::V_CMP_U_F64_e64);

1214 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,

1215 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,

1216 AMDGPU::V_CMP_NLG_F64_e64);

1218 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,

1219 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,

1220 AMDGPU::V_CMP_NLE_F64_e64);

1222 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,

1223 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,

1224 AMDGPU::V_CMP_NLT_F64_e64);

1226 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,

1227 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,

1228 AMDGPU::V_CMP_NGE_F64_e64);

1230 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,

1231 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,

1232 AMDGPU::V_CMP_NGT_F64_e64);

1234 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,

1235 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,

1236 AMDGPU::V_CMP_NEQ_F64_e64);

1238 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,

1239 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,

1240 AMDGPU::V_CMP_TRU_F64_e64);

1242 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,

1243 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,

1244 AMDGPU::V_CMP_F_F64_e64);

1245 }

1246}

1247

1249 unsigned Size) const {

1250 if (Size == 64) {

1252 return -1;

1253

1254 switch (P) {

1256 return AMDGPU::S_CMP_LG_U64;

1258 return AMDGPU::S_CMP_EQ_U64;

1259 default:

1260 return -1;

1261 }

1262 }

1263

1264 if (Size == 32) {

1265 switch (P) {

1267 return AMDGPU::S_CMP_LG_U32;

1269 return AMDGPU::S_CMP_EQ_U32;

1271 return AMDGPU::S_CMP_GT_I32;

1273 return AMDGPU::S_CMP_GE_I32;

1275 return AMDGPU::S_CMP_LT_I32;

1277 return AMDGPU::S_CMP_LE_I32;

1279 return AMDGPU::S_CMP_GT_U32;

1281 return AMDGPU::S_CMP_GE_U32;

1283 return AMDGPU::S_CMP_LT_U32;

1285 return AMDGPU::S_CMP_LE_U32;

1287 return AMDGPU::S_CMP_EQ_F32;

1289 return AMDGPU::S_CMP_GT_F32;

1291 return AMDGPU::S_CMP_GE_F32;

1293 return AMDGPU::S_CMP_LT_F32;

1295 return AMDGPU::S_CMP_LE_F32;

1297 return AMDGPU::S_CMP_LG_F32;

1299 return AMDGPU::S_CMP_O_F32;

1301 return AMDGPU::S_CMP_U_F32;

1303 return AMDGPU::S_CMP_NLG_F32;

1305 return AMDGPU::S_CMP_NLE_F32;

1307 return AMDGPU::S_CMP_NLT_F32;

1309 return AMDGPU::S_CMP_NGE_F32;

1311 return AMDGPU::S_CMP_NGT_F32;

1313 return AMDGPU::S_CMP_NEQ_F32;

1314 default:

1316 }

1317 }

1318

1319 if (Size == 16) {

1321 return -1;

1322

1323 switch (P) {

1325 return AMDGPU::S_CMP_EQ_F16;

1327 return AMDGPU::S_CMP_GT_F16;

1329 return AMDGPU::S_CMP_GE_F16;

1331 return AMDGPU::S_CMP_LT_F16;

1333 return AMDGPU::S_CMP_LE_F16;

1335 return AMDGPU::S_CMP_LG_F16;

1337 return AMDGPU::S_CMP_O_F16;

1339 return AMDGPU::S_CMP_U_F16;

1341 return AMDGPU::S_CMP_NLG_F16;

1343 return AMDGPU::S_CMP_NLE_F16;

1345 return AMDGPU::S_CMP_NLT_F16;

1347 return AMDGPU::S_CMP_NGE_F16;

1349 return AMDGPU::S_CMP_NGT_F16;

1351 return AMDGPU::S_CMP_NEQ_F16;

1352 default:

1354 }

1355 }

1356

1357 return -1;

1358}

1359

1360bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {

1361

1364

1365 Register SrcReg = I.getOperand(2).getReg();

1367

1369

1370 Register CCReg = I.getOperand(0).getReg();

1371 if (!isVCC(CCReg, *MRI)) {

1372 int Opcode = getS_CMPOpcode(Pred, Size);

1373 if (Opcode == -1)

1374 return false;

1376 .add(I.getOperand(2))

1377 .add(I.getOperand(3));

1378 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)

1379 .addReg(AMDGPU::SCC);

1380 bool Ret =

1383 I.eraseFromParent();

1384 return Ret;

1385 }

1386

1387 if (I.getOpcode() == AMDGPU::G_FCMP)

1388 return false;

1389

1391 if (Opcode == -1)

1392 return false;

1393

1395 I.getOperand(0).getReg())

1396 .add(I.getOperand(2))

1397 .add(I.getOperand(3));

1401 I.eraseFromParent();

1402 return Ret;

1403}

1404

1405bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {

1406 Register Dst = I.getOperand(0).getReg();

1407 if (isVCC(Dst, *MRI))

1408 return false;

1409

1410 LLT DstTy = MRI->getType(Dst);

1412 return false;

1413

1416 Register SrcReg = I.getOperand(2).getReg();

1418

1419

1420 if (Size == 1)

1421 return false;

1422

1425 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);

1426 I.eraseFromParent();

1428 }

1429

1431 if (Opcode == -1)

1432 return false;

1433

1437 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS.getReg());

1438 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS.getReg());

1440 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, true);

1442 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &I, true);

1443 SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst);

1445 SelectedMI.addImm(Src0Mods);

1446 SelectedMI.addReg(Src0Reg);

1448 SelectedMI.addImm(Src1Mods);

1449 SelectedMI.addReg(Src1Reg);

1451 SelectedMI.addImm(0);

1453 SelectedMI.addImm(0);

1454

1457 return false;

1458

1459 I.eraseFromParent();

1460 return true;

1461}

1462

1463

1464

1465

1466

1470 if (MI->getParent() != MBB)

1471 return false;

1472

1473

1474 if (MI->getOpcode() == AMDGPU::COPY) {

1475 auto DstRB = MRI.getRegBankOrNull(MI->getOperand(0).getReg());

1476 auto SrcRB = MRI.getRegBankOrNull(MI->getOperand(1).getReg());

1477 if (DstRB && SrcRB && DstRB->getID() == AMDGPU::VCCRegBankID &&

1478 SrcRB->getID() == AMDGPU::SGPRRegBankID)

1479 return true;

1480 }

1481

1482

1483 if (isa(MI))

1484 return true;

1485

1487

1491

1492 return false;

1493}

1494

1495bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {

1498 Register DstReg = I.getOperand(0).getReg();

1499 Register SrcReg = I.getOperand(2).getReg();

1500 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();

1502

1503

1504

1505 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))

1506 return false;

1507

1508 std::optional Arg =

1510

1512

1513 if (BallotSize != WaveSize) {

1514 Dst = MRI->createVirtualRegister(TRI.getBoolRC());

1515 }

1516

1517 if (Arg) {

1518 const int64_t Value = Arg->Value.getZExtValue();

1519 if (Value == 0) {

1520

1521 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;

1523 } else {

1524

1527 }

1529 return false;

1530 } else {

1532

1533 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst).addReg(SrcReg);

1535 return false;

1536 } else {

1537

1538 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;

1539 auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), Dst)

1544 return false;

1545 }

1546 }

1547

1548

1549 if (BallotSize != WaveSize) {

1550 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

1551 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg).addImm(0);

1552 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

1554 .addImm(AMDGPU::sub0)

1556 .addImm(AMDGPU::sub1);

1557 }

1558

1559 I.eraseFromParent();

1560 return true;

1561}

1562

1563bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {

1564 Register DstReg = I.getOperand(0).getReg();

1568 return false;

1569

1570 const bool IsVALU = DstBank->getID() == AMDGPU::VGPRRegBankID;

1571

1574 auto SymbolName = cast(Metadata->getOperand(0))->getString();

1575 auto *RelocSymbol = cast(

1576 M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));

1577

1579 BuildMI(*BB, &I, I.getDebugLoc(),

1580 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)

1582

1583 I.eraseFromParent();

1584 return true;

1585}

1586

1587bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const {

1589

1590 Register DstReg = I.getOperand(0).getReg();

1592 unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ?

1593 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;

1594

1597

1598 auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg);

1599

1603 } else {

1608 }

1609

1610 I.eraseFromParent();

1612}

1613

1614bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {

1618

1620 Register DstReg = Dst.getReg();

1621 unsigned Depth = I.getOperand(2).getImm();

1622

1625 if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||

1627 return false;

1628

1629

1630 if (Depth != 0 ||

1632 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)

1634 I.eraseFromParent();

1635 return true;

1636 }

1637

1639

1641

1642

1645 AMDGPU::SReg_64RegClass, DL);

1646 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)

1648 I.eraseFromParent();

1649 return true;

1650}

1651

1652bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {

1653

1654

1656 BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))

1657 .add(MI.getOperand(1));

1658

1660 MI.eraseFromParent();

1661

1662 if (MRI->getRegClassOrNull(Reg))

1664 return true;

1665}

1666

1667bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(

1672

1673 unsigned IndexOperand = MI.getOperand(7).getImm();

1674 bool WaveRelease = MI.getOperand(8).getImm() != 0;

1675 bool WaveDone = MI.getOperand(9).getImm() != 0;

1676

1677 if (WaveDone && !WaveRelease)

1678 report_fatal_error("ds_ordered_count: wave_done requires wave_release");

1679

1680 unsigned OrderedCountIndex = IndexOperand & 0x3f;

1681 IndexOperand &= ~0x3f;

1682 unsigned CountDw = 0;

1683

1685 CountDw = (IndexOperand >> 24) & 0xf;

1686 IndexOperand &= ~(0xf << 24);

1687

1688 if (CountDw < 1 || CountDw > 4) {

1690 "ds_ordered_count: dword count must be between 1 and 4");

1691 }

1692 }

1693

1694 if (IndexOperand)

1696

1697 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;

1699

1700 unsigned Offset0 = OrderedCountIndex << 2;

1701 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);

1702

1704 Offset1 |= (CountDw - 1) << 6;

1705

1707 Offset1 |= ShaderType << 2;

1708

1709 unsigned Offset = Offset0 | (Offset1 << 8);

1710

1711 Register M0Val = MI.getOperand(2).getReg();

1712 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

1714

1715 Register DstReg = MI.getOperand(0).getReg();

1716 Register ValReg = MI.getOperand(3).getReg();

1718 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)

1722

1724 return false;

1725

1727 MI.eraseFromParent();

1728 return Ret;

1729}

1730

1732 switch (IntrID) {

1733 case Intrinsic::amdgcn_ds_gws_init:

1734 return AMDGPU::DS_GWS_INIT;

1735 case Intrinsic::amdgcn_ds_gws_barrier:

1736 return AMDGPU::DS_GWS_BARRIER;

1737 case Intrinsic::amdgcn_ds_gws_sema_v:

1738 return AMDGPU::DS_GWS_SEMA_V;

1739 case Intrinsic::amdgcn_ds_gws_sema_br:

1740 return AMDGPU::DS_GWS_SEMA_BR;

1741 case Intrinsic::amdgcn_ds_gws_sema_p:

1742 return AMDGPU::DS_GWS_SEMA_P;

1743 case Intrinsic::amdgcn_ds_gws_sema_release_all:

1744 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;

1745 default:

1747 }

1748}

1749

1750bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,

1752 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&

1754 return false;

1755

1756

1757 const bool HasVSrc = MI.getNumOperands() == 3;

1758 assert(HasVSrc || MI.getNumOperands() == 2);

1759

1760 Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();

1762 if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)

1763 return false;

1764

1766 unsigned ImmOffset;

1767

1770

1772

1773

1774

1775

1776 if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {

1777 Readfirstlane = OffsetDef;

1780 }

1781

1782 if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {

1783

1784

1785

1786

1787

1789 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)

1791 } else {

1792 std::tie(BaseOffset, ImmOffset) =

1794

1795 if (Readfirstlane) {

1796

1797

1799 return false;

1800

1803 } else {

1805 AMDGPU::SReg_32RegClass, *MRI))

1806 return false;

1807 }

1808

1809 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

1810 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)

1814

1815 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

1817 }

1818

1819

1820

1821

1823

1824 if (HasVSrc) {

1825 Register VSrc = MI.getOperand(1).getReg();

1827

1829 return false;

1830 }

1831

1832 MIB.addImm(ImmOffset)

1834

1836

1837 MI.eraseFromParent();

1838 return true;

1839}

1840

1841bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,

1842 bool IsAppend) const {

1843 Register PtrBase = MI.getOperand(2).getReg();

1844 LLT PtrTy = MRI->getType(PtrBase);

1846

1848 std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));

1849

1850

1851 if (!isDSOffsetLegal(PtrBase, Offset)) {

1852 PtrBase = MI.getOperand(2).getReg();

1854 }

1855

1858 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;

1859

1860 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

1863 return false;

1864

1865 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())

1867 .addImm(IsGDS ? -1 : 0)

1869 MI.eraseFromParent();

1871}

1872

1873bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const {

1876

1879}

1880

1881bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {

1882 Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID();

1886

1887

1888 if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||

1889 IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {

1892 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));

1893 }

1894 MI.eraseFromParent();

1895 return true;

1896 }

1897 }

1898

1899 if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {

1900

1903 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))

1905 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT))

1907 MI.eraseFromParent();

1908 return true;

1909 }

1910

1912}

1913

1915 bool &IsTexFail) {

1916 if (TexFailCtrl)

1917 IsTexFail = true;

1918

1919 TFE = (TexFailCtrl & 0x1) ? true : false;

1920 TexFailCtrl &= ~(uint64_t)0x1;

1921 LWE = (TexFailCtrl & 0x2) ? true : false;

1922 TexFailCtrl &= ~(uint64_t)0x2;

1923

1924 return TexFailCtrl == 0;

1925}

1926

1927bool AMDGPUInstructionSelector::selectImageIntrinsic(

1931

1934

1936 unsigned IntrOpcode = Intr->BaseOpcode;

1940

1941 const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;

1942

1944 LLT VDataTy;

1945 int NumVDataDwords = -1;

1946 bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||

1947 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;

1948

1949 bool Unorm;

1950 if (!BaseOpcode->Sampler)

1951 Unorm = true;

1952 else

1953 Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0;

1954

1955 bool TFE;

1956 bool LWE;

1957 bool IsTexFail = false;

1958 if (parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(),

1959 TFE, LWE, IsTexFail))

1960 return false;

1961

1962 const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm();

1963 const bool IsA16 = (Flags & 1) != 0;

1964 const bool IsG16 = (Flags & 2) != 0;

1965

1966

1967 if (IsA16 && !STI.hasG16() && !IsG16)

1968 return false;

1969

1970 unsigned DMask = 0;

1971 unsigned DMaskLanes = 0;

1972

1973 if (BaseOpcode->Atomic) {

1974 VDataOut = MI.getOperand(0).getReg();

1975 VDataIn = MI.getOperand(2).getReg();

1976 LLT Ty = MRI->getType(VDataIn);

1977

1978

1979 const bool Is64Bit = BaseOpcode->AtomicX2 ?

1982

1984 assert(MI.getOperand(3).getReg() == AMDGPU::NoRegister);

1985

1986 DMask = Is64Bit ? 0xf : 0x3;

1987 NumVDataDwords = Is64Bit ? 4 : 2;

1988 } else {

1989 DMask = Is64Bit ? 0x3 : 0x1;

1990 NumVDataDwords = Is64Bit ? 2 : 1;

1991 }

1992 } else {

1993 DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm();

1995

1996 if (BaseOpcode->Store) {

1997 VDataIn = MI.getOperand(1).getReg();

1998 VDataTy = MRI->getType(VDataIn);

1999 NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32;

2000 } else if (BaseOpcode->NoReturn) {

2001 NumVDataDwords = 0;

2002 } else {

2003 VDataOut = MI.getOperand(0).getReg();

2004 VDataTy = MRI->getType(VDataOut);

2005 NumVDataDwords = DMaskLanes;

2006

2008 NumVDataDwords = (DMaskLanes + 1) / 2;

2009 }

2010 }

2011

2012

2013 if (Subtarget->hasG16() && IsG16) {

2016 assert(G16MappingInfo);

2017 IntrOpcode = G16MappingInfo->G16;

2018 }

2019

2020

2021 assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");

2022

2023 unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();

2024 if (BaseOpcode->Atomic)

2028 return false;

2029

2030 int NumVAddrRegs = 0;

2031 int NumVAddrDwords = 0;

2032 for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {

2033

2035 if (!AddrOp.isReg())

2036 continue;

2037

2040 break;

2041

2042 ++NumVAddrRegs;

2043 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;

2044 }

2045

2046

2047

2048

2049 const bool UseNSA =

2050 NumVAddrRegs != 1 &&

2052 : NumVAddrDwords == NumVAddrRegs);

2053 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {

2054 LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n");

2055 return false;

2056 }

2057

2058 if (IsTexFail)

2059 ++NumVDataDwords;

2060

2061 int Opcode = -1;

2062 if (IsGFX12Plus) {

2064 NumVDataDwords, NumVAddrDwords);

2065 } else if (IsGFX11Plus) {

2067 UseNSA ? AMDGPU::MIMGEncGfx11NSA

2068 : AMDGPU::MIMGEncGfx11Default,

2069 NumVDataDwords, NumVAddrDwords);

2070 } else if (IsGFX10Plus) {

2072 UseNSA ? AMDGPU::MIMGEncGfx10NSA

2073 : AMDGPU::MIMGEncGfx10Default,

2074 NumVDataDwords, NumVAddrDwords);

2075 } else {

2078 NumVDataDwords, NumVAddrDwords);

2079 if (Opcode == -1) {

2082 << "requested image instruction is not supported on this GPU\n");

2083 return false;

2084 }

2085 }

2086 if (Opcode == -1 &&

2089 NumVDataDwords, NumVAddrDwords);

2090 if (Opcode == -1)

2092 NumVDataDwords, NumVAddrDwords);

2093 }

2094 if (Opcode == -1)

2095 return false;

2096

2099

2100 if (VDataOut) {

2102 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;

2103

2104 Register TmpReg = MRI->createVirtualRegister(

2105 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);

2106 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;

2107

2109 if (MRI->use_empty(VDataOut)) {

2110 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut)

2112 }

2113

2114 } else {

2115 MIB.addDef(VDataOut);

2116 }

2117 }

2118

2119 if (VDataIn)

2120 MIB.addReg(VDataIn);

2121

2122 for (int I = 0; I != NumVAddrRegs; ++I) {

2124 if (SrcOp.isReg()) {

2127 }

2128 }

2129

2130 MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg());

2131 if (BaseOpcode->Sampler)

2132 MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg());

2133

2134 MIB.addImm(DMask);

2135

2136 if (IsGFX10Plus)

2140

2142 MIB.addImm(IsA16 &&

2143 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);

2144 if (IsGFX10Plus)

2145 MIB.addImm(IsA16 ? -1 : 0);

2146

2148 MIB.addImm(TFE);

2149 } else if (TFE) {

2150 LLVM_DEBUG(dbgs() << "TFE is not supported on this GPU\n");

2151 return false;

2152 }

2153

2155 MIB.addImm(LWE);

2156 if (!IsGFX10Plus)

2157 MIB.addImm(DimInfo->DA ? -1 : 0);

2158 if (BaseOpcode->HasD16)

2159 MIB.addImm(IsD16 ? -1 : 0);

2160

2161 MI.eraseFromParent();

2164 return true;

2165}

2166

2167

2168

2169bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(

2171 Register Dst0 = MI.getOperand(0).getReg();

2172 Register Dst1 = MI.getOperand(1).getReg();

2173

2176

2178 Register Data0 = MI.getOperand(4).getReg();

2179 Register Data1 = MI.getOperand(5).getReg();

2180 unsigned Offset = MI.getOperand(6).getImm();

2181

2182 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)

2189

2190 MI.eraseFromParent();

2192}

2193

2194bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(

2196 Intrinsic::ID IntrinsicID = cast(I).getIntrinsicID();

2197 switch (IntrinsicID) {

2198 case Intrinsic::amdgcn_end_cf:

2199 return selectEndCfIntrinsic(I);

2200 case Intrinsic::amdgcn_ds_ordered_add:

2201 case Intrinsic::amdgcn_ds_ordered_swap:

2202 return selectDSOrderedIntrinsic(I, IntrinsicID);

2203 case Intrinsic::amdgcn_ds_gws_init:

2204 case Intrinsic::amdgcn_ds_gws_barrier:

2205 case Intrinsic::amdgcn_ds_gws_sema_v:

2206 case Intrinsic::amdgcn_ds_gws_sema_br:

2207 case Intrinsic::amdgcn_ds_gws_sema_p:

2208 case Intrinsic::amdgcn_ds_gws_sema_release_all:

2209 return selectDSGWSIntrinsic(I, IntrinsicID);

2210 case Intrinsic::amdgcn_ds_append:

2211 return selectDSAppendConsume(I, true);

2212 case Intrinsic::amdgcn_ds_consume:

2213 return selectDSAppendConsume(I, false);

2214 case Intrinsic::amdgcn_init_whole_wave:

2215 return selectInitWholeWave(I);

2216 case Intrinsic::amdgcn_s_barrier:

2217 case Intrinsic::amdgcn_s_barrier_signal:

2218 case Intrinsic::amdgcn_s_barrier_wait:

2219 return selectSBarrier(I);

2220 case Intrinsic::amdgcn_raw_buffer_load_lds:

2221 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:

2222 case Intrinsic::amdgcn_struct_buffer_load_lds:

2223 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:

2224 return selectBufferLoadLds(I);

2225 case Intrinsic::amdgcn_global_load_lds:

2226 return selectGlobalLoadLds(I);

2227 case Intrinsic::amdgcn_exp_compr:

2229 Function &F = I.getMF()->getFunction();

2231 F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error);

2232 F.getContext().diagnose(NoFpRet);

2233 return false;

2234 }

2235 break;

2236 case Intrinsic::amdgcn_ds_bvh_stack_rtn:

2237 return selectDSBvhStackIntrinsic(I);

2238 case Intrinsic::amdgcn_s_barrier_init:

2239 case Intrinsic::amdgcn_s_barrier_signal_var:

2240 return selectNamedBarrierInit(I, IntrinsicID);

2241 case Intrinsic::amdgcn_s_barrier_join:

2242 case Intrinsic::amdgcn_s_get_named_barrier_state:

2243 return selectNamedBarrierInst(I, IntrinsicID);

2244 case Intrinsic::amdgcn_s_get_barrier_state:

2245 return selectSGetBarrierState(I, IntrinsicID);

2246 case Intrinsic::amdgcn_s_barrier_signal_isfirst:

2247 return selectSBarrierSignalIsfirst(I, IntrinsicID);

2248 }

2250}

2251

2252bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {

2254 return true;

2255

2258

2259 Register DstReg = I.getOperand(0).getReg();

2264 if (!isVCC(CCReg, *MRI)) {

2265 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :

2266 AMDGPU::S_CSELECT_B32;

2269

2270

2271

2272

2273 if (MRI->getRegClassOrNull(CCReg))

2276 .add(I.getOperand(2))

2277 .add(I.getOperand(3));

2278

2279 bool Ret = false;

2282 I.eraseFromParent();

2283 return Ret;

2284 }

2285

2286

2287 if (Size > 32)

2288 return false;

2289

2291 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)

2293 .add(I.getOperand(3))

2295 .add(I.getOperand(2))

2296 .add(I.getOperand(1));

2297

2299 I.eraseFromParent();

2300 return Ret;

2301}

2302

2303bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {

2304 Register DstReg = I.getOperand(0).getReg();

2305 Register SrcReg = I.getOperand(1).getReg();

2306 const LLT DstTy = MRI->getType(DstReg);

2307 const LLT SrcTy = MRI->getType(SrcReg);

2309

2312 if (DstTy == S1) {

2313

2314

2315 DstRB = SrcRB;

2316 } else {

2317 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);

2318 if (SrcRB != DstRB)

2319 return false;

2320 }

2321

2322 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;

2323

2326

2331 if (!SrcRC || !DstRC)

2332 return false;

2333

2336 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");

2337 return false;

2338 }

2339

2340 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {

2344 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), DstReg)

2345 .addReg(SrcReg, 0, AMDGPU::lo16);

2346 I.eraseFromParent();

2347 return true;

2348 }

2349

2353

2354 Register LoReg = MRI->createVirtualRegister(DstRC);

2355 Register HiReg = MRI->createVirtualRegister(DstRC);

2356 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), LoReg)

2357 .addReg(SrcReg, 0, AMDGPU::sub0);

2358 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), HiReg)

2359 .addReg(SrcReg, 0, AMDGPU::sub1);

2360

2361 if (IsVALU && STI.hasSDWA()) {

2362

2363

2365 BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)

2366 .addImm(0)

2367 .addReg(HiReg)

2368 .addImm(0)

2374 } else {

2375 Register TmpReg0 = MRI->createVirtualRegister(DstRC);

2376 Register TmpReg1 = MRI->createVirtualRegister(DstRC);

2377 Register ImmReg = MRI->createVirtualRegister(DstRC);

2378 if (IsVALU) {

2379 BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)

2382 } else {

2383 BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)

2387 }

2388

2389 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;

2390 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;

2391 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;

2392

2401

2402 if (!IsVALU) {

2403 And.setOperandDead(3);

2404 Or.setOperandDead(3);

2405 }

2406 }

2407

2408 I.eraseFromParent();

2409 return true;

2410 }

2411

2413 return false;

2414

2415 if (SrcSize > 32) {

2416 unsigned SubRegIdx =

2418 if (SubRegIdx == AMDGPU::NoSubRegister)

2419 return false;

2420

2421

2422

2424 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);

2425 if (!SrcWithSubRC)

2426 return false;

2427

2428 if (SrcWithSubRC != SrcRC) {

2430 return false;

2431 }

2432

2433 I.getOperand(1).setSubReg(SubRegIdx);

2434 }

2435

2436 I.setDesc(TII.get(TargetOpcode::COPY));

2437 return true;

2438}

2439

2440

2442 Mask = maskTrailingOnes(Size);

2443 int SignedMask = static_cast<int>(Mask);

2444 return SignedMask >= -16 && SignedMask <= 64;

2445}

2446

2447

2448const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(

2452 if (auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))

2453 return RB;

2454

2455

2456 if (auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))

2458 return nullptr;

2459}

2460

2461bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {

2462 bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG;

2463 bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg;

2466 const Register DstReg = I.getOperand(0).getReg();

2467 const Register SrcReg = I.getOperand(1).getReg();

2468

2469 const LLT DstTy = MRI->getType(DstReg);

2470 const LLT SrcTy = MRI->getType(SrcReg);

2471 const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ?

2475 return false;

2476

2477

2478 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);

2479

2480

2481 if (I.getOpcode() == AMDGPU::G_ANYEXT) {

2482 if (DstSize <= 32)

2483 return selectCOPY(I);

2484

2486 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);

2489 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);

2490

2491 Register UndefReg = MRI->createVirtualRegister(SrcRC);

2492 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);

2493 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

2495 .addImm(AMDGPU::sub0)

2497 .addImm(AMDGPU::sub1);

2498 I.eraseFromParent();

2499

2502 }

2503

2504 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {

2505

2506

2507

2508 unsigned Mask;

2511 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)

2514 I.eraseFromParent();

2516 }

2517

2518 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;

2522 .addImm(0)

2523 .addImm(SrcSize);

2524 I.eraseFromParent();

2526 }

2527

2528 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {

2530 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;

2532 return false;

2533

2534 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {

2535 const unsigned SextOpc = SrcSize == 8 ?

2536 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;

2539 I.eraseFromParent();

2541 }

2542

2543

2544

2545 if (DstSize > 32 && SrcSize == 32) {

2546 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2547 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;

2549 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg)

2553 } else {

2554 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)

2556 }

2557 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

2559 .addImm(AMDGPU::sub0)

2561 .addImm(AMDGPU::sub1);

2562 I.eraseFromParent();

2564 *MRI);

2565 }

2566

2567 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;

2568 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;

2569

2570

2571 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {

2572

2573 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

2574 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2575 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;

2576

2577 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);

2578 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)

2580 .addImm(AMDGPU::sub0)

2582 .addImm(AMDGPU::sub1);

2583

2586 .addImm(SrcSize << 16);

2587

2588 I.eraseFromParent();

2590 }

2591

2592 unsigned Mask;

2594 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)

2598 } else {

2601 .addImm(SrcSize << 16);

2602 }

2603

2604 I.eraseFromParent();

2606 }

2607

2608 return false;

2609}

2610

2613}

2614

2618 Reg = BitcastSrc;

2619 return Reg;

2620}

2621

2626 return false;

2627

2634 return true;

2635 }

2636 }

2637

2639 if (Shuffle->getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)

2640 return false;

2641

2644

2646 assert(Mask.size() == 2);

2647

2648 if (Mask[0] == 1 && Mask[1] <= 1) {

2650 return true;

2651 }

2652

2653 return false;

2654}

2655

2656bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {

2658 return false;

2659

2660 Register Dst = I.getOperand(0).getReg();

2662 if (DstRB->getID() != AMDGPU::SGPRRegBankID)

2663 return false;

2664

2665 Register Src = I.getOperand(1).getReg();

2666

2671 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)

2673 I.eraseFromParent();

2675 }

2676 }

2677

2678 return false;

2679}

2680

2681bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {

2682

2683

2684

2685

2686

2687

2688

2689

2690

2691

2692

2693 Register Dst = MI.getOperand(0).getReg();

2695 if (DstRB->getID() != AMDGPU::SGPRRegBankID ||

2697 return false;

2698

2699 Register Src = MI.getOperand(1).getReg();

2701 if (Fabs)

2703

2706 return false;

2707

2710 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2711 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2712 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2713 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2714

2715 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)

2716 .addReg(Src, 0, AMDGPU::sub0);

2717 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)

2718 .addReg(Src, 0, AMDGPU::sub1);

2719 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)

2720 .addImm(0x80000000);

2721

2722

2723 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;

2724 BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)

2728 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)

2730 .addImm(AMDGPU::sub0)

2732 .addImm(AMDGPU::sub1);

2733 MI.eraseFromParent();

2734 return true;

2735}

2736

2737

2738bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {

2739 Register Dst = MI.getOperand(0).getReg();

2741 if (DstRB->getID() != AMDGPU::SGPRRegBankID ||

2743 return false;

2744

2745 Register Src = MI.getOperand(1).getReg();

2748 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2749 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2750 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2751 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

2752

2755 return false;

2756

2757 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)

2758 .addReg(Src, 0, AMDGPU::sub0);

2759 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)

2760 .addReg(Src, 0, AMDGPU::sub1);

2761 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)

2762 .addImm(0x7fffffff);

2763

2764

2765

2766 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)

2770 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)

2772 .addImm(AMDGPU::sub0)

2774 .addImm(AMDGPU::sub1);

2775

2776 MI.eraseFromParent();

2777 return true;

2778}

2779

2781 return MI.getOpcode() == TargetOpcode::G_CONSTANT;

2782}

2783

2784void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,

2786

2787 unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;

2789 MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg());

2790

2792

2793 if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD)

2794 return;

2795

2796 GEPInfo GEPInfo;

2797

2798 for (unsigned i = 1; i != 3; ++i) {

2803

2804

2805 assert(GEPInfo.Imm == 0);

2807 continue;

2808 }

2810 if (OpBank->getID() == AMDGPU::SGPRRegBankID)

2811 GEPInfo.SgprParts.push_back(GEPOp.getReg());

2812 else

2813 GEPInfo.VgprParts.push_back(GEPOp.getReg());

2814 }

2815

2817 getAddrModeInfo(*PtrMI, MRI, AddrInfo);

2818}

2819

2820bool AMDGPUInstructionSelector::isSGPR(Register Reg) const {

2821 return RBI.getRegBank(Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;

2822}

2823

2824bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {

2825 if (MI.hasOneMemOperand())

2826 return false;

2827

2830

2831

2832

2833

2834

2835 if (Ptr || isa(Ptr) || isa(Ptr) ||

2836 isa(Ptr) || isa(Ptr))

2837 return true;

2838

2840 return true;

2841

2842 if (MI.getOpcode() == AMDGPU::G_PREFETCH)

2843 return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==

2844 AMDGPU::SGPRRegBankID;

2845

2847 return I && I->getMetadata("amdgpu.uniform");

2848}

2849

2850bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const {

2851 for (const GEPInfo &GEPInfo : AddrInfo) {

2852 if (!GEPInfo.VgprParts.empty())

2853 return true;

2854 }

2855 return false;

2856}

2857

2858void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {

2859 const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());

2864

2865

2866 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)

2868 }

2869}

2870

2871bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(

2873 initM0(I);

2875}

2876

2878 if (Reg.isPhysical())

2879 return false;

2880

2882 const unsigned Opcode = MI.getOpcode();

2883

2884 if (Opcode == AMDGPU::COPY)

2886

2887 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||

2888 Opcode == AMDGPU::G_XOR)

2891

2892 if (auto *GI = dyn_cast(&MI))

2893 return GI->is(Intrinsic::amdgcn_class);

2894

2895 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;

2896}

2897

2898bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {

2903

2904 unsigned BrOpcode;

2907

2908

2909

2910

2911

2912

2913 if (!isVCC(CondReg, *MRI)) {

2915 return false;

2916

2917 CondPhysReg = AMDGPU::SCC;

2918 BrOpcode = AMDGPU::S_CBRANCH_SCC1;

2919 ConstrainRC = &AMDGPU::SReg_32RegClass;

2920 } else {

2921

2922

2923

2924

2926 const bool Is64 = STI.isWave64();

2927 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;

2928 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;

2929

2930 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());

2931 BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)

2935 CondReg = TmpReg;

2936 }

2937

2938 CondPhysReg = TRI.getVCC();

2939 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;

2940 ConstrainRC = TRI.getBoolRC();

2941 }

2942

2943 if (MRI->getRegClassOrNull(CondReg))

2944 MRI->setRegClass(CondReg, ConstrainRC);

2945

2946 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)

2948 BuildMI(*BB, &I, DL, TII.get(BrOpcode))

2949 .addMBB(I.getOperand(1).getMBB());

2950

2951 I.eraseFromParent();

2952 return true;

2953}

2954

2955bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(

2957 Register DstReg = I.getOperand(0).getReg();

2959 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;

2960 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));

2961 if (IsVGPR)

2963

2965 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);

2966}

2967

2968bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {

2969 Register DstReg = I.getOperand(0).getReg();

2970 Register SrcReg = I.getOperand(1).getReg();

2971 Register MaskReg = I.getOperand(2).getReg();

2972 LLT Ty = MRI->getType(DstReg);

2973 LLT MaskTy = MRI->getType(MaskReg);

2976

2980 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;

2981 if (DstRB != SrcRB)

2982 return false;

2983

2984

2985

2989

2990 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;

2991 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;

2992

2994 !CanCopyLow32 && !CanCopyHi32) {

2995 auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)

2999 I.eraseFromParent();

3001 }

3002

3003 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;

3005 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;

3006

3010 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);

3011

3015 return false;

3016

3019 "ptrmask should have been narrowed during legalize");

3020

3021 auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)

3024

3025 if (!IsVGPR)

3027 I.eraseFromParent();

3028 return true;

3029 }

3030

3031 Register HiReg = MRI->createVirtualRegister(&RegRC);

3032 Register LoReg = MRI->createVirtualRegister(&RegRC);

3033

3034

3035 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)

3036 .addReg(SrcReg, 0, AMDGPU::sub0);

3037 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)

3038 .addReg(SrcReg, 0, AMDGPU::sub1);

3039

3040 Register MaskedLo, MaskedHi;

3041

3042 if (CanCopyLow32) {

3043

3044 MaskedLo = LoReg;

3045 } else {

3046

3047 Register MaskLo = MRI->createVirtualRegister(&RegRC);

3048 MaskedLo = MRI->createVirtualRegister(&RegRC);

3049

3050 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskLo)

3051 .addReg(MaskReg, 0, AMDGPU::sub0);

3052 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedLo)

3055 }

3056

3057 if (CanCopyHi32) {

3058

3059 MaskedHi = HiReg;

3060 } else {

3061 Register MaskHi = MRI->createVirtualRegister(&RegRC);

3062 MaskedHi = MRI->createVirtualRegister(&RegRC);

3063

3064 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskHi)

3065 .addReg(MaskReg, 0, AMDGPU::sub1);

3066 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedHi)

3069 }

3070

3071 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)

3073 .addImm(AMDGPU::sub0)

3075 .addImm(AMDGPU::sub1);

3076 I.eraseFromParent();

3077 return true;

3078}

3079

3080

3081

3082static std::pair<Register, unsigned>

3088

3089 std::tie(IdxBaseReg, Offset) =

3091 if (IdxBaseReg == AMDGPU::NoRegister) {

3092

3093

3095 IdxBaseReg = IdxReg;

3096 }

3097

3099

3100

3101

3102 if (static_cast<unsigned>(Offset) >= SubRegs.size())

3103 return std::pair(IdxReg, SubRegs[0]);

3104 return std::pair(IdxBaseReg, SubRegs[Offset]);

3105}

3106

3107bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(

3109 Register DstReg = MI.getOperand(0).getReg();

3110 Register SrcReg = MI.getOperand(1).getReg();

3111 Register IdxReg = MI.getOperand(2).getReg();

3112

3113 LLT DstTy = MRI->getType(DstReg);

3114 LLT SrcTy = MRI->getType(SrcReg);

3115

3119

3120

3121

3122 if (IdxRB->getID() != AMDGPU::SGPRRegBankID)

3123 return false;

3124

3126 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);

3128 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);

3129 if (!SrcRC || !DstRC)

3130 return false;

3134 return false;

3135

3139

3142 *MRI, TRI, SrcRC, IdxReg, DstTy.getSizeInBits() / 8, *KB);

3143

3144 if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {

3146 return false;

3147

3148 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3150

3151 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;

3152 BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg)

3155 MI.eraseFromParent();

3156 return true;

3157 }

3158

3159 if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32)

3160 return false;

3161

3163 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3165 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)

3168 MI.eraseFromParent();

3169 return true;

3170 }

3171

3174 BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)

3178

3179 MI.eraseFromParent();

3180 return true;

3181}

3182

3183

3184bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(

3186 Register DstReg = MI.getOperand(0).getReg();

3187 Register VecReg = MI.getOperand(1).getReg();

3188 Register ValReg = MI.getOperand(2).getReg();

3189 Register IdxReg = MI.getOperand(3).getReg();

3190

3191 LLT VecTy = MRI->getType(DstReg);

3192 LLT ValTy = MRI->getType(ValReg);

3195

3199

3201

3202

3203

3204 if (IdxRB->getID() != AMDGPU::SGPRRegBankID)

3205 return false;

3206

3208 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);

3210 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);

3211

3216 return false;

3217

3218 if (VecRB->getID() == AMDGPU::VGPRRegBankID && ValSize != 32)

3219 return false;

3220

3222 std::tie(IdxReg, SubReg) =

3224

3225 const bool IndexMode = VecRB->getID() == AMDGPU::VGPRRegBankID &&

3227

3230

3231 if (!IndexMode) {

3232 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3234

3236 VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID);

3237 BuildMI(*BB, MI, DL, RegWriteOp, DstReg)

3241 MI.eraseFromParent();

3242 return true;

3243 }

3244

3247 BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)

3252

3253 MI.eraseFromParent();

3254 return true;

3255}

3256

3257bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {

3259 unsigned Opc;

3260 unsigned Size = MI.getOperand(3).getImm();

3261

3262

3263 const bool HasVIndex = MI.getNumOperands() == 9;

3265 int OpOffset = 0;

3266 if (HasVIndex) {

3267 VIndex = MI.getOperand(4).getReg();

3268 OpOffset = 1;

3269 }

3270

3271 Register VOffset = MI.getOperand(4 + OpOffset).getReg();

3272 std::optional MaybeVOffset =

3274 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();

3275

3276 switch (Size) {

3277 default:

3278 return false;

3279 case 1:

3280 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN

3281 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN

3282 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN

3283 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;

3284 break;

3285 case 2:

3286 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN

3287 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN

3288 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN

3289 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;

3290 break;

3291 case 4:

3292 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN

3293 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN

3294 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN

3295 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;

3296 break;

3297 case 12:

3299 return false;

3300

3301 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN

3302 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN

3303 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN

3304 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;

3305 break;

3306 case 16:

3308 return false;

3309

3310 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN

3311 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN

3312 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN

3313 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;

3314 break;

3315 }

3316

3319 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3320 .add(MI.getOperand(2));

3321

3323

3324 if (HasVIndex && HasVOffset) {

3325 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());

3326 BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)

3328 .addImm(AMDGPU::sub0)

3330 .addImm(AMDGPU::sub1);

3331

3333 } else if (HasVIndex) {

3335 } else if (HasVOffset) {

3336 MIB.addReg(VOffset);

3337 }

3338

3339 MIB.add(MI.getOperand(1));

3340 MIB.add(MI.getOperand(5 + OpOffset));

3341 MIB.add(MI.getOperand(6 + OpOffset));

3343 unsigned Aux = MI.getOperand(7 + OpOffset).getImm();

3348 ? 1

3349 : 0);

3350

3353 LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm();

3355 StorePtrI.V = nullptr;

3357

3362

3366

3367 MIB.setMemRefs({LoadMMO, StoreMMO});

3368

3369 MI.eraseFromParent();

3371}

3372

3373

3378

3379

3381 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)

3383

3384 assert(Def->getNumOperands() == 3 &&

3385 MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64));

3387 return Def->getOperand(1).getReg();

3388 }

3389

3391}

3392

3393bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{

3394 unsigned Opc;

3395 unsigned Size = MI.getOperand(3).getImm();

3396

3397 switch (Size) {

3398 default:

3399 return false;

3400 case 1:

3401 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;

3402 break;

3403 case 2:

3404 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;

3405 break;

3406 case 4:

3407 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;

3408 break;

3409 case 12:

3411 return false;

3412 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;

3413 break;

3414 case 16:

3416 return false;

3417 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;

3418 break;

3419 }

3420

3423 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

3424 .add(MI.getOperand(2));

3425

3428

3429

3430 if (!isSGPR(Addr)) {

3432 if (isSGPR(AddrDef->Reg)) {

3433 Addr = AddrDef->Reg;

3434 } else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {

3437 if (isSGPR(SAddr)) {

3438 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();

3440 Addr = SAddr;

3441 VOffset = Off;

3442 }

3443 }

3444 }

3445 }

3446

3447 if (isSGPR(Addr)) {

3449 if (!VOffset) {

3450 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

3451 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), VOffset)

3453 }

3454 }

3455

3458

3459 if (isSGPR(Addr))

3460 MIB.addReg(VOffset);

3461

3462 MIB.add(MI.getOperand(4))

3463 .add(MI.getOperand(5));

3464

3467 LoadPtrI.Offset = MI.getOperand(4).getImm();

3477 sizeof(int32_t), Align(4));

3478

3479 MIB.setMemRefs({LoadMMO, StoreMMO});

3480

3481 MI.eraseFromParent();

3483}

3484

3485bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{

3486 MI.setDesc(TII.get(MI.getOperand(1).getImm()));

3487 MI.removeOperand(1);

3488 MI.addImplicitDefUseOperands(*MI.getParent()->getParent());

3489 return true;

3490}

3491

3492

3493

3494bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {

3495 unsigned Opc;

3497 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:

3498 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;

3499 break;

3500 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:

3501 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;

3502 break;

3503 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:

3504 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;

3505 break;

3506 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:

3507 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;

3508 break;

3509 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:

3510 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;

3511 break;

3512 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:

3513 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;

3514 break;

3515 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:

3516 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;

3517 break;

3518 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:

3519 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;

3520 break;

3521 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:

3522 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;

3523 break;

3524 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:

3525 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;

3526 break;

3527 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:

3528 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;

3529 break;

3530 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:

3531 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;

3532 break;

3533 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:

3534 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;

3535 break;

3536 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:

3537 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;

3538 break;

3539 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:

3540 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;

3541 break;

3542 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:

3543 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;

3544 break;

3545 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:

3546 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;

3547 break;

3548 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:

3549 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;

3550 break;

3551 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:

3552 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;

3553 break;

3554 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:

3555 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;

3556 break;

3557 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:

3558 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;

3559 break;

3560 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:

3561 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;

3562 break;

3563 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:

3564 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;

3565 break;

3566 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:

3567 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;

3568 break;

3569 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:

3570 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;

3571 break;

3572 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:

3573 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;

3574 break;

3575 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:

3576 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;

3577 break;

3578 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:

3579 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;

3580 break;

3581 default:

3583 }

3584

3585 auto VDst_In = MI.getOperand(4);

3586

3587 MI.setDesc(TII.get(Opc));

3588 MI.removeOperand(4);

3589 MI.removeOperand(1);

3590 MI.addOperand(VDst_In);

3591 MI.addImplicitDefUseOperands(*MI.getParent()->getParent());

3592 return true;

3593}

3594

3595bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(

3597 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&

3599 return false;

3600 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&

3602 return false;

3603

3604 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap

3605 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64

3606 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;

3607

3608 MI.removeOperand(2);

3609 MI.setDesc(TII.get(Opcode));

3611

3614

3616}

3617

3618bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {

3619 Register DstReg = MI.getOperand(0).getReg();

3620 Register SrcReg = MI.getOperand(1).getReg();

3622 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;

3625

3626 if (IsVALU) {

3627 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)

3630 } else {

3631 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)

3635 }

3636

3638 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;

3640 return false;

3641

3642 MI.eraseFromParent();

3643 return true;

3644}

3645

3646

3647

3651 unsigned NumOpcodes = 0;

3652 uint8_t LHSBits, RHSBits;

3653

3655

3656

3657

3658

3659

3660

3661

3662

3663

3664 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };

3665

3667 Bits = 0xff;

3668 return true;

3669 }

3671 Bits = 0;

3672 return true;

3673 }

3674

3675 for (unsigned I = 0; I < Src.size(); ++I) {

3676

3677 if (Src[I] == Op) {

3678 Bits = SrcBits[I];

3679 return true;

3680 }

3681

3682 if (Src[I] == R) {

3683 Bits = SrcBits[I];

3684 Src[I] = Op;

3685 return true;

3686 }

3687 }

3688

3689 if (Src.size() == 3) {

3690

3691

3692

3696 for (unsigned I = 0; I < Src.size(); ++I) {

3697 if (Src[I] == LHS) {

3698 Bits = ~SrcBits[I];

3699 return true;

3700 }

3701 }

3702 }

3703

3704 return false;

3705 }

3706

3707 Bits = SrcBits[Src.size()];

3708 Src.push_back(Op);

3709 return true;

3710 };

3711

3713 switch (MI->getOpcode()) {

3714 case TargetOpcode::G_AND:

3715 case TargetOpcode::G_OR:

3716 case TargetOpcode::G_XOR: {

3719

3721 if (!getOperandBits(LHS, LHSBits) ||

3722 !getOperandBits(RHS, RHSBits)) {

3723 Src = Backup;

3724 return std::make_pair(0, 0);

3725 }

3726

3727

3729 if (Op.first) {

3730 NumOpcodes += Op.first;

3731 LHSBits = Op.second;

3732 }

3733

3735 if (Op.first) {

3736 NumOpcodes += Op.first;

3737 RHSBits = Op.second;

3738 }

3739 break;

3740 }

3741 default:

3742 return std::make_pair(0, 0);

3743 }

3744

3746 switch (MI->getOpcode()) {

3747 case TargetOpcode::G_AND:

3748 TTbl = LHSBits & RHSBits;

3749 break;

3750 case TargetOpcode::G_OR:

3751 TTbl = LHSBits | RHSBits;

3752 break;

3753 case TargetOpcode::G_XOR:

3754 TTbl = LHSBits ^ RHSBits;

3755 break;

3756 default:

3757 break;

3758 }

3759

3760 return std::make_pair(NumOpcodes + 1, TTbl);

3761}

3762

3763bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {

3765 return false;

3766

3767 Register DstReg = MI.getOperand(0).getReg();

3769 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;

3770 if (!IsVALU)

3771 return false;

3772

3775 unsigned NumOpcodes;

3776

3777 std::tie(NumOpcodes, TTbl) = BitOp3_Op(DstReg, Src, *MRI);

3778

3779

3780

3781 if (NumOpcodes < 2 || Src.empty())

3782 return false;

3783

3784 const bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);

3785 if (NumOpcodes == 2 && IsB32) {

3786

3787

3788

3792 return false;

3793 } else if (NumOpcodes < 4) {

3794

3795

3796

3797 return false;

3798 }

3799

3800 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;

3804

3805 for (unsigned I = 0; I < Src.size(); ++I) {

3807 if (RB->getID() != AMDGPU::SGPRRegBankID)

3808 continue;

3809 if (CBL > 0) {

3810 --CBL;

3811 continue;

3812 }

3813 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

3814 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::COPY), NewReg)

3816 Src[I] = NewReg;

3817 }

3818

3819

3820

3821

3822

3823

3824 while (Src.size() < 3)

3825 Src.push_back(Src[0]);

3826

3827 auto MIB = BuildMI(*MBB, MI, DL, TII.get(Opc), DstReg);

3828 if (!IsB32)

3829 MIB.addImm(0);

3831 if (!IsB32)

3832 MIB.addImm(0);

3834 if (!IsB32)

3835 MIB.addImm(0);

3838 if (!IsB32)

3839 MIB.addImm(0);

3840

3842 MI.eraseFromParent();

3843

3844 return true;

3845}

3846

3847bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {

3848 Register SrcReg = MI.getOperand(0).getReg();

3850 return false;

3851

3858

3859 if (!WaveAddr) {

3860 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

3861 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr)

3865 }

3866

3869

3870 MI.eraseFromParent();

3871 return true;

3872}

3873

3875

3876 if (I.isPreISelOpcode()) {

3877 if (I.isCopy())

3878 return selectCOPY(I);

3879 return true;

3880 }

3881

3882 switch (I.getOpcode()) {

3883 case TargetOpcode::G_AND:

3884 case TargetOpcode::G_OR:

3885 case TargetOpcode::G_XOR:

3886 if (selectBITOP3(I))

3887 return true;

3889 return true;

3890 return selectG_AND_OR_XOR(I);

3891 case TargetOpcode::G_ADD:

3892 case TargetOpcode::G_SUB:

3893 case TargetOpcode::G_PTR_ADD:

3895 return true;

3896 return selectG_ADD_SUB(I);

3897 case TargetOpcode::G_UADDO:

3898 case TargetOpcode::G_USUBO:

3899 case TargetOpcode::G_UADDE:

3900 case TargetOpcode::G_USUBE:

3901 return selectG_UADDO_USUBO_UADDE_USUBE(I);

3902 case AMDGPU::G_AMDGPU_MAD_U64_U32:

3903 case AMDGPU::G_AMDGPU_MAD_I64_I32:

3904 return selectG_AMDGPU_MAD_64_32(I);

3905 case TargetOpcode::G_INTTOPTR:

3906 case TargetOpcode::G_BITCAST:

3907 case TargetOpcode::G_PTRTOINT:

3908 case TargetOpcode::G_FREEZE:

3909 return selectCOPY(I);

3910 case TargetOpcode::G_FNEG:

3912 return true;

3913 return selectG_FNEG(I);

3914 case TargetOpcode::G_FABS:

3916 return true;

3917 return selectG_FABS(I);

3918 case TargetOpcode::G_EXTRACT:

3919 return selectG_EXTRACT(I);

3920 case TargetOpcode::G_MERGE_VALUES:

3921 case TargetOpcode::G_CONCAT_VECTORS:

3922 return selectG_MERGE_VALUES(I);

3923 case TargetOpcode::G_UNMERGE_VALUES:

3924 return selectG_UNMERGE_VALUES(I);

3925 case TargetOpcode::G_BUILD_VECTOR:

3926 case TargetOpcode::G_BUILD_VECTOR_TRUNC:

3927 return selectG_BUILD_VECTOR(I);

3928 case TargetOpcode::G_IMPLICIT_DEF:

3929 return selectG_IMPLICIT_DEF(I);

3930 case TargetOpcode::G_INSERT:

3931 return selectG_INSERT(I);

3932 case TargetOpcode::G_INTRINSIC:

3933 case TargetOpcode::G_INTRINSIC_CONVERGENT:

3934 return selectG_INTRINSIC(I);

3935 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:

3936 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:

3937 return selectG_INTRINSIC_W_SIDE_EFFECTS(I);

3938 case TargetOpcode::G_ICMP:

3939 case TargetOpcode::G_FCMP:

3940 if (selectG_ICMP_or_FCMP(I))

3941 return true;

3943 case TargetOpcode::G_LOAD:

3944 case TargetOpcode::G_ZEXTLOAD:

3945 case TargetOpcode::G_SEXTLOAD:

3946 case TargetOpcode::G_STORE:

3947 case TargetOpcode::G_ATOMIC_CMPXCHG:

3948 case TargetOpcode::G_ATOMICRMW_XCHG:

3949 case TargetOpcode::G_ATOMICRMW_ADD:

3950 case TargetOpcode::G_ATOMICRMW_SUB:

3951 case TargetOpcode::G_ATOMICRMW_AND:

3952 case TargetOpcode::G_ATOMICRMW_OR:

3953 case TargetOpcode::G_ATOMICRMW_XOR:

3954 case TargetOpcode::G_ATOMICRMW_MIN:

3955 case TargetOpcode::G_ATOMICRMW_MAX:

3956 case TargetOpcode::G_ATOMICRMW_UMIN:

3957 case TargetOpcode::G_ATOMICRMW_UMAX:

3958 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:

3959 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:

3960 case TargetOpcode::G_ATOMICRMW_FADD:

3961 case TargetOpcode::G_ATOMICRMW_FMIN:

3962 case TargetOpcode::G_ATOMICRMW_FMAX:

3963 return selectG_LOAD_STORE_ATOMICRMW(I);

3964 case TargetOpcode::G_SELECT:

3965 return selectG_SELECT(I);

3966 case TargetOpcode::G_TRUNC:

3967 return selectG_TRUNC(I);

3968 case TargetOpcode::G_SEXT:

3969 case TargetOpcode::G_ZEXT:

3970 case TargetOpcode::G_ANYEXT:

3971 case TargetOpcode::G_SEXT_INREG:

3972

3973

3974

3975 if (MRI->getType(I.getOperand(1).getReg()) != LLT::scalar(1) &&

3977 return true;

3978 return selectG_SZA_EXT(I);

3979 case TargetOpcode::G_FPEXT:

3980 if (selectG_FPEXT(I))

3981 return true;

3983 case TargetOpcode::G_BRCOND:

3984 return selectG_BRCOND(I);

3985 case TargetOpcode::G_GLOBAL_VALUE:

3986 return selectG_GLOBAL_VALUE(I);

3987 case TargetOpcode::G_PTRMASK:

3988 return selectG_PTRMASK(I);

3989 case TargetOpcode::G_EXTRACT_VECTOR_ELT:

3990 return selectG_EXTRACT_VECTOR_ELT(I);

3991 case TargetOpcode::G_INSERT_VECTOR_ELT:

3992 return selectG_INSERT_VECTOR_ELT(I);

3993 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:

3994 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:

3995 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:

3996 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:

3997 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {

4000 assert(Intr && "not an image intrinsic with image pseudo");

4001 return selectImageIntrinsic(I, Intr);

4002 }

4003 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:

4004 return selectBVHIntrinsic(I);

4005 case AMDGPU::G_SBFX:

4006 case AMDGPU::G_UBFX:

4007 return selectG_SBFX_UBFX(I);

4008 case AMDGPU::G_SI_CALL:

4009 I.setDesc(TII.get(AMDGPU::SI_CALL));

4010 return true;

4011 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:

4012 return selectWaveAddress(I);

4013 case AMDGPU::G_STACKRESTORE:

4014 return selectStackRestore(I);

4015 case AMDGPU::G_PHI:

4016 return selectPHI(I);

4017 case TargetOpcode::G_CONSTANT:

4018 case TargetOpcode::G_FCONSTANT:

4019 default:

4021 }

4022 return false;

4023}

4024

4026AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {

4027 return {{

4029 }};

4030

4031}

4032

4033std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(

4034 Register Src, bool IsCanonicalizing, bool AllowAbs, bool OpSel) const {

4035 unsigned Mods = 0;

4037

4038 if (MI->getOpcode() == AMDGPU::G_FNEG) {

4039 Src = MI->getOperand(1).getReg();

4042 } else if (MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {

4043

4044

4047 if (LHS && LHS->isZero()) {

4049 Src = MI->getOperand(2).getReg();

4050 }

4051 }

4052

4053 if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) {

4054 Src = MI->getOperand(1).getReg();

4056 }

4057

4058 if (OpSel)

4060

4061 return std::pair(Src, Mods);

4062}

4063

4064Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(

4066 bool ForceVGPR) const {

4067 if ((Mods != 0 || ForceVGPR) &&

4068 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {

4069

4070

4071

4072

4075 TII.get(AMDGPU::COPY), VGPRSrc)

4077 Src = VGPRSrc;

4078 }

4079

4080 return Src;

4081}

4082

4083

4084

4085

4087AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {

4088 return {{

4090 }};

4091}

4092

4094AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {

4096 unsigned Mods;

4097 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

4098

4099 return {{

4101 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4102 },

4106 }};

4107}

4108

4110AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {

4112 unsigned Mods;

4113 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),

4114 true,

4115 false);

4116

4117 return {{

4119 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4120 },

4124 }};

4125}

4126

4128AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {

4129 return {{

4133 }};

4134}

4135

4137AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {

4139 unsigned Mods;

4140 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

4141

4142 return {{

4144 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4145 },

4147 }};

4148}

4149

4151AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(

4154 unsigned Mods;

4155 std::tie(Src, Mods) =

4156 selectVOP3ModsImpl(Root.getReg(), false);

4157

4158 return {{

4160 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4161 },

4163 }};

4164}

4165

4167AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {

4169 unsigned Mods;

4170 std::tie(Src, Mods) =

4171 selectVOP3ModsImpl(Root.getReg(), true,

4172 false);

4173

4174 return {{

4176 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));

4177 },

4179 }};

4180}

4181

4183AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {

4186 if (Def->getOpcode() == AMDGPU::G_FNEG || Def->getOpcode() == AMDGPU::G_FABS)

4187 return {};

4188 return {{

4190 }};

4191}

4192

4193std::pair<Register, unsigned>

4194AMDGPUInstructionSelector::selectVOP3PModsImpl(

4196 unsigned Mods = 0;

4198

4199 if (MI->getOpcode() == AMDGPU::G_FNEG &&

4200

4201

4204 Src = MI->getOperand(1).getReg();

4205 MI = MRI.getVRegDef(Src);

4206 }

4207

4208

4209

4210

4211 (void)IsDOT;

4212

4213

4215

4216 return std::pair(Src, Mods);

4217}

4218

4220AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {

4223

4225 unsigned Mods;

4226 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);

4227

4228 return {{

4231 }};

4232}

4233

4235AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {

4238

4240 unsigned Mods;

4241 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI, true);

4242

4243 return {{

4246 }};

4247}

4248

4250AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {

4251

4252

4253

4255 "expected i1 value");

4257 if (Root.getImm() == -1)

4259 return {{

4261 }};

4262}

4263

4265AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(

4268 "expected i1 value");

4270 if (Root.getImm() != 0)

4272

4273 return {{

4275 }};

4276}

4277

4282 switch (Elts.size()) {

4283 case 8:

4284 DstRegClass = &AMDGPU::VReg_256RegClass;

4285 break;

4286 case 4:

4287 DstRegClass = &AMDGPU::VReg_128RegClass;

4288 break;

4289 case 2:

4290 DstRegClass = &AMDGPU::VReg_64RegClass;

4291 break;

4292 default:

4294 }

4295

4297 auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE)

4298 .addDef(MRI.createVirtualRegister(DstRegClass));

4299 for (unsigned i = 0; i < Elts.size(); ++i) {

4300 MIB.addReg(Elts[i]);

4302 }

4304}

4305

4310 if (ModOpcode == TargetOpcode::G_FNEG) {

4312

4314 for (auto El : Elts) {

4317 break;

4319 }

4320 if (Elts.size() != NegAbsElts.size()) {

4321

4323 } else {

4324

4327 }

4328 } else {

4329 assert(ModOpcode == TargetOpcode::G_FABS);

4330

4333 }

4334}

4335

4337AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const {

4341

4342 if (GBuildVector *BV = dyn_cast(MRI->getVRegDef(Src))) {

4343 assert(BV->getNumSources() > 0);

4344

4345 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));

4346 unsigned ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG)

4347 ? AMDGPU::G_FNEG

4348 : AMDGPU::G_FABS;

4349 for (unsigned i = 0; i < BV->getNumSources(); ++i) {

4350 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));

4351 if (ElF32->getOpcode() != ModOpcode)

4352 break;

4354 }

4355

4356

4357 if (BV->getNumSources() == EltsF32.size()) {

4359 *MRI);

4360 }

4361 }

4362

4365}

4366

4368AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const {

4372

4373 if (GConcatVectors *CV = dyn_cast(MRI->getVRegDef(Src))) {

4374 for (unsigned i = 0; i < CV->getNumSources(); ++i) {

4377 break;

4379 }

4380

4381

4382 if (CV->getNumSources() == EltsV2F16.size()) {

4386 }

4387 }

4388

4391}

4392

4394AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const {

4398

4399 if (GConcatVectors *CV = dyn_cast(MRI->getVRegDef(Src))) {

4400 assert(CV->getNumSources() > 0);

4401 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));

4402

4403 unsigned ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG)

4404 ? AMDGPU::G_FNEG

4405 : AMDGPU::G_FABS;

4406

4407 for (unsigned i = 0; i < CV->getNumSources(); ++i) {

4408 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));

4409 if (ElV2F16->getOpcode() != ModOpcode)

4410 break;

4412 }

4413

4414

4415 if (CV->getNumSources() == EltsV2F16.size()) {

4418 *MRI);

4419 }

4420 }

4421

4424}

4425

4427AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const {

4428 std::optional FPValReg;

4432 MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());

4433 }}};

4434 }

4435

4436

4437 return {};

4438 }

4439

4443 return {

4445 }

4446 }

4447

4448 return {};

4449}

4450

4452AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const {

4455 unsigned Key = 0;

4456

4458 std::optional ShiftAmt;

4460 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&

4461 ShiftAmt->Value.getZExtValue() % 8 == 0) {

4462 Key = ShiftAmt->Value.getZExtValue() / 8;

4463 Src = ShiftSrc;

4464 }

4465

4466 return {{

4469 }};

4470}

4471

4473AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const {

4474

4477 unsigned Key = 0;

4478

4480 std::optional ShiftAmt;

4482 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&

4483 ShiftAmt->Value.getZExtValue() == 16) {

4484 Src = ShiftSrc;

4485 Key = 1;

4486 }

4487

4488 return {{

4491 }};

4492}

4493

4495AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {

4497 unsigned Mods;

4498 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

4499

4500

4501 return {{

4504 }};

4505}

4506

4508AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {

4510 unsigned Mods;

4511 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),

4512 true,

4513 false,

4514 false);

4515

4516 return {{

4519 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, true));

4520 },

4522 }};

4523}

4524

4526AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {

4528 unsigned Mods;

4529 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),

4530 true,

4531 false,

4532 true);

4533

4534 return {{

4537 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, true));

4538 },

4540 }};

4541}

4542

4543bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,

4546 int64_t *Offset) const {

4549

4550

4551

4553 getAddrModeInfo(*MI, *MRI, AddrInfo);

4554

4555 if (AddrInfo.empty())

4556 return false;

4557

4558 const GEPInfo &GEPI = AddrInfo[0];

4559 std::optional<int64_t> EncodedImm;

4560

4561 if (SOffset && Offset) {

4563 true);

4564 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&

4565 AddrInfo.size() > 1) {

4566 const GEPInfo &GEPI2 = AddrInfo[1];

4567 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {

4570 Base = GEPI2.SgprParts[0];

4571 *SOffset = OffsetReg;

4572 *Offset = *EncodedImm;

4574 return true;

4575

4576

4577

4578

4579

4581 if (*Offset + SKnown.getMinValue().getSExtValue() < 0)

4582 return false;

4583

4584 return true;

4585 }

4586 }

4587 }

4588 return false;

4589 }

4590

4592 false);

4593 if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {

4594 Base = GEPI.SgprParts[0];

4595 *Offset = *EncodedImm;

4596 return true;

4597 }

4598

4599

4600 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&

4601 GEPI.Imm != 0) {

4602

4603

4604

4605

4606 Base = GEPI.SgprParts[0];

4607 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

4608 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)

4610 return true;

4611 }

4612

4613 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {

4615 Base = GEPI.SgprParts[0];

4616 *SOffset = OffsetReg;

4617 return true;

4618 }

4619 }

4620

4621 return false;

4622}

4623

4625AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {

4628 if (!selectSmrdOffset(Root, Base, nullptr, &Offset))

4629 return std::nullopt;

4630

4633}

4634

4636AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {

4638 getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);

4639

4640 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)

4641 return std::nullopt;

4642

4643 const GEPInfo &GEPInfo = AddrInfo[0];

4644 Register PtrReg = GEPInfo.SgprParts[0];

4645 std::optional<int64_t> EncodedImm =

4647 if (!EncodedImm)

4648 return std::nullopt;

4649

4650 return {{

4653 }};

4654}

4655

4657AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {

4659 if (!selectSmrdOffset(Root, Base, &SOffset, nullptr))

4660 return std::nullopt;

4661

4664}

4665

4667AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {

4670 if (!selectSmrdOffset(Root, Base, &SOffset, &Offset))

4671 return std::nullopt;

4672

4676}

4677

4678std::pair<Register, int>

4679AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,

4680 uint64_t FlatVariant) const {

4682

4684

4687

4689 int64_t ConstOffset;

4690 std::tie(PtrBase, ConstOffset) =

4691 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);

4692

4694 !isFlatScratchBaseLegal(Root.getReg())))

4696

4697 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();

4698 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))

4700

4701 return std::pair(PtrBase, ConstOffset);

4702}

4703

4705AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {

4706 auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FLAT);

4707

4708 return {{

4711 }};

4712}

4713

4715AMDGPUInstructionSelector::selectGlobalOffset(MachineOperand &Root) const {

4717

4718 return {{

4721 }};

4722}

4723

4725AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const {

4727

4728 return {{

4731 }};

4732}

4733

4734

4736AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {

4739 int64_t ConstOffset;

4740 int64_t ImmOffset = 0;

4741

4742

4743

4744 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

4745

4746 if (ConstOffset != 0) {

4749 Addr = PtrBase;

4750 ImmOffset = ConstOffset;

4751 } else {

4753 if (isSGPR(PtrBaseDef->Reg)) {

4754 if (ConstOffset > 0) {

4755

4756

4757

4758

4759

4760 int64_t SplitImmOffset, RemainderOffset;

4761 std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset(

4763

4764 if (isUInt<32>(RemainderOffset)) {

4768 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

4769

4770 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),

4771 HighBits)

4772 .addImm(RemainderOffset);

4773

4774 return {{

4777 MIB.addReg(HighBits);

4778 },

4780 }};

4781 }

4782 }

4783

4784

4785

4786

4787

4788

4789 unsigned NumLiterals =

4793 return std::nullopt;

4794 }

4795 }

4796 }

4797

4798

4800 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {

4801

4804

4805 if (isSGPR(SAddr)) {

4806 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();

4807

4808

4809

4813 },

4815 MIB.addReg(VOffset);

4816 },

4818 MIB.addImm(ImmOffset);

4819 }}};

4820 }

4821 }

4822 }

4823

4824

4825

4826 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||

4827 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))

4828 return std::nullopt;

4829

4830

4831

4834 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

4835

4836 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)

4838

4839 return {{

4843 }};

4844}

4845

4847AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {

4850 int64_t ConstOffset;

4851 int64_t ImmOffset = 0;

4852

4853

4854

4855 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

4856

4857 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&

4860 Addr = PtrBase;

4861 ImmOffset = ConstOffset;

4862 }

4863

4865 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {

4866 int FI = AddrDef->MI->getOperand(1).getIndex();

4867 return {{

4870 }};

4871 }

4872

4873 Register SAddr = AddrDef->Reg;

4874

4875 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {

4876 Register LHS = AddrDef->MI->getOperand(1).getReg();

4877 Register RHS = AddrDef->MI->getOperand(2).getReg();

4880

4881 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&

4882 isSGPR(RHSDef->Reg)) {

4883 int FI = LHSDef->MI->getOperand(1).getIndex();

4887 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

4888

4889 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)

4891 .addReg(RHSDef->Reg)

4893 }

4894 }

4895

4896 if (!isSGPR(SAddr))

4897 return std::nullopt;

4898

4899 return {{

4902 }};

4903}

4904

4905

4906bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(

4909 return false;

4910

4911

4912

4913

4917 uint64_t VMax = VKnown.getMaxValue().getZExtValue();

4918 uint64_t SMax = SKnown.getMaxValue().getZExtValue();

4919 return (VMax & 3) + (SMax & 3) >= 4;

4920}

4921

4923AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {

4926 int64_t ConstOffset;

4927 int64_t ImmOffset = 0;

4928

4929

4930

4931 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

4932

4934 if (ConstOffset != 0 &&

4936 Addr = PtrBase;

4937 ImmOffset = ConstOffset;

4938 }

4939

4941 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)

4942 return std::nullopt;

4943

4944 Register RHS = AddrDef->MI->getOperand(2).getReg();

4945 if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)

4946 return std::nullopt;

4947

4948 Register LHS = AddrDef->MI->getOperand(1).getReg();

4950

4951 if (OrigAddr != Addr) {

4952 if (!isFlatScratchBaseLegalSVImm(OrigAddr))

4953 return std::nullopt;

4954 } else {

4955 if (!isFlatScratchBaseLegalSV(OrigAddr))

4956 return std::nullopt;

4957 }

4958

4959 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))

4960 return std::nullopt;

4961

4962 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {

4963 int FI = LHSDef->MI->getOperand(1).getIndex();

4964 return {{

4968 }};

4969 }

4970

4971 if (!isSGPR(LHS))

4972 return std::nullopt;

4973

4974 return {{

4978 }};

4979}

4980

4982AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {

4987

4991 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

4992

4993

4994

4996 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),

4997 HighBits)

4999

5001 MIB.addReg(Info->getScratchRSrcReg());

5002 },

5004 MIB.addReg(HighBits);

5005 },

5007

5008

5010 },

5013 }}};

5014 }

5015

5017

5018

5019

5020 std::optional FI;

5022

5025 int64_t ConstOffset;

5026 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);

5027 if (ConstOffset != 0) {

5031 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);

5032 if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX)

5034 else

5035 VAddr = PtrBase;

5036 Offset = ConstOffset;

5037 }

5038 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {

5040 }

5041

5043 MIB.addReg(Info->getScratchRSrcReg());

5044 },

5046 if (FI)

5048 else

5050 },

5052

5053

5055 },

5058 }}};

5059}

5060

5061bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,

5062 int64_t Offset) const {

5063 if (!isUInt<16>(Offset))

5064 return false;

5065

5067 return true;

5068

5069

5070

5072}

5073

5074bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,

5075 int64_t Offset1,

5076 unsigned Size) const {

5077 if (Offset0 % Size != 0 || Offset1 % Size != 0)

5078 return false;

5079 if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))

5080 return false;

5081

5083 return true;

5084

5085

5086

5088}

5089

5090

5092 return Addr->getOpcode() == TargetOpcode::G_OR ||

5093 (Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&

5095}

5096

5097

5098

5099

5100bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {

5102

5104 return true;

5105

5106

5107

5109 return true;

5110

5113

5114 if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {

5115 std::optional RhsValReg =

5117

5118

5119

5120

5121 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&

5122 RhsValReg->Value.getSExtValue() > -0x40000000)

5123 return true;

5124 }

5125

5127}

5128

5129

5130

5131bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const {

5133

5135 return true;

5136

5137

5138

5140 return true;

5141

5145}

5146

5147

5148

5149bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(

5151

5152

5154 return true;

5155

5158 std::optional BaseDef =

5160 std::optional RHSOffset =

5163

5164

5165

5166

5167

5170 (RHSOffset->Value.getSExtValue() < 0 &&

5171 RHSOffset->Value.getSExtValue() > -0x40000000)))

5172 return true;

5173

5174 Register LHS = BaseDef->MI->getOperand(1).getReg();

5175 Register RHS = BaseDef->MI->getOperand(2).getReg();

5177}

5178

5179bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,

5180 unsigned ShAmtBits) const {

5181 assert(MI.getOpcode() == TargetOpcode::G_AND);

5182

5183 std::optional RHS =

5185 if (!RHS)

5186 return false;

5187

5188 if (RHS->countr_one() >= ShAmtBits)

5189 return true;

5190

5192 return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;

5193}

5194

5196AMDGPUInstructionSelector::selectMUBUFScratchOffset(

5200

5201 std::optional Def =

5203 assert(Def && "this shouldn't be an optional result");

5205

5207 return {{

5209 MIB.addReg(Info->getScratchRSrcReg());

5210 },

5212 MIB.addReg(WaveBase);

5213 },

5215 }};

5216 }

5217

5219

5220

5226 return {};

5229 if (!WaveBase)

5230 return {};

5231

5232 return {{

5234 MIB.addReg(Info->getScratchRSrcReg());

5235 },

5237 MIB.addReg(WaveBase);

5238 },

5240 }};

5241 }

5242

5245 return {};

5246

5247 return {{

5249 MIB.addReg(Info->getScratchRSrcReg());

5250 },

5253 },

5255 }};

5256}

5257

5258std::pair<Register, unsigned>

5259AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const {

5261 int64_t ConstAddr = 0;

5262

5265 std::tie(PtrBase, Offset) =

5266 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);

5267

5269 if (isDSOffsetLegal(PtrBase, Offset)) {

5270

5271 return std::pair(PtrBase, Offset);

5272 }

5273 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {

5274

5275

5276

5278

5279

5280 }

5281

5282 return std::pair(Root.getReg(), 0);

5283}

5284

5286AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {

5289 std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);

5290 return {{

5293 }};

5294}

5295

5297AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {

5298 return selectDSReadWrite2(Root, 4);

5299}

5300

5302AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(MachineOperand &Root) const {

5303 return selectDSReadWrite2(Root, 8);

5304}

5305

5307AMDGPUInstructionSelector::selectDSReadWrite2(MachineOperand &Root,

5308 unsigned Size) const {

5311 std::tie(Reg, Offset) = selectDSReadWrite2Impl(Root, Size);

5312 return {{

5316 }};

5317}

5318

5319std::pair<Register, unsigned>

5320AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root,

5321 unsigned Size) const {

5323 int64_t ConstAddr = 0;

5324

5327 std::tie(PtrBase, Offset) =

5328 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);

5329

5331 int64_t OffsetValue0 = Offset;

5332 int64_t OffsetValue1 = Offset + Size;

5333 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1, Size)) {

5334

5335 return std::pair(PtrBase, OffsetValue0 / Size);

5336 }

5337 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {

5338

5339

5341

5342

5343 }

5344

5345 return std::pair(Root.getReg(), 0);

5346}

5347

5348

5349

5350

5351

5352std::pair<Register, int64_t>

5353AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(

5356 if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)

5357 return {Root, 0};

5358

5360 std::optional MaybeOffset =

5362 if (!MaybeOffset)

5363 return {Root, 0};

5364 return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()};

5365}

5366

5369}

5370

5371

5372

5376 Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

5377 Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

5378 Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);

5379 Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);

5380

5381 B.buildInstr(AMDGPU::S_MOV_B32)

5382 .addDef(RSrc2)

5383 .addImm(FormatLo);

5384 B.buildInstr(AMDGPU::S_MOV_B32)

5385 .addDef(RSrc3)

5386 .addImm(FormatHi);

5387

5388

5389

5390

5391 B.buildInstr(AMDGPU::REG_SEQUENCE)

5392 .addDef(RSrcHi)

5393 .addReg(RSrc2)

5394 .addImm(AMDGPU::sub0)

5395 .addReg(RSrc3)

5396 .addImm(AMDGPU::sub1);

5397

5399 if (!BasePtr) {

5400 RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);

5401 B.buildInstr(AMDGPU::S_MOV_B64)

5402 .addDef(RSrcLo)

5403 .addImm(0);

5404 }

5405

5406 B.buildInstr(AMDGPU::REG_SEQUENCE)

5407 .addDef(RSrc)

5408 .addReg(RSrcLo)

5409 .addImm(AMDGPU::sub0_sub1)

5410 .addReg(RSrcHi)

5411 .addImm(AMDGPU::sub2_sub3);

5412

5413 return RSrc;

5414}

5415

5418 uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();

5419

5420

5421

5423}

5424

5427 uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();

5428

5429

5430

5432}

5433

5434AMDGPUInstructionSelector::MUBUFAddressData

5435AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const {

5436 MUBUFAddressData Data;

5437 Data.N0 = Src;

5438

5441

5442 std::tie(PtrBase, Offset) = getPtrBaseWithConstantOffset(Src, *MRI);

5443 if (isUInt<32>(Offset)) {

5444 Data.N0 = PtrBase;

5446 }

5447

5450 Data.N2 = InputAdd->getOperand(1).getReg();

5451 Data.N3 = InputAdd->getOperand(2).getReg();

5452

5453

5454

5455

5456

5457

5460 }

5461

5462 return Data;

5463}

5464

5465

5466bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {

5467

5468

5469 if (Addr.N2)

5470 return true;

5471

5473 return N0Bank->getID() == AMDGPU::VGPRRegBankID;

5474}

5475

5476

5477

5478

5479void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(

5482 return;

5483

5484

5485 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5486 B.buildInstr(AMDGPU::S_MOV_B32)

5487 .addDef(SOffset)

5488 .addImm(ImmOffset);

5489 ImmOffset = 0;

5490}

5491

5492bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(

5495

5496

5498 return false;

5499

5500 MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());

5501 if (!shouldUseAddr64(AddrData))

5502 return false;

5503

5507 Offset = AddrData.Offset;

5508

5509

5511

5512 if (N2) {

5513 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {

5515 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {

5516

5517

5518 VAddr = N0;

5519 } else {

5520 SRDPtr = N3;

5521 VAddr = N2;

5522 }

5523 } else {

5524

5525 SRDPtr = N2;

5526 VAddr = N3;

5527 }

5528 } else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {

5529

5530 VAddr = N0;

5531 } else {

5532

5533

5534 SRDPtr = N0;

5535 }

5536

5539 splitIllegalMUBUFOffset(B, SOffset, Offset);

5540 return true;

5541}

5542

5543bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(

5545 int64_t &Offset) const {

5546

5547

5549 return false;

5550

5551 MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());

5552 if (shouldUseAddr64(AddrData))

5553 return false;

5554

5555

5556

5557 Register SRDPtr = AddrData.N0;

5558 Offset = AddrData.Offset;

5559

5560

5562

5564 splitIllegalMUBUFOffset(B, SOffset, Offset);

5565 return true;

5566}

5567

5569AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {

5574

5575 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))

5576 return {};

5577

5578

5579

5580 return {{

5582 MIB.addReg(RSrcReg);

5583 },

5586 },

5588 if (SOffset)

5589 MIB.addReg(SOffset);

5591 MIB.addReg(AMDGPU::SGPR_NULL);

5592 else

5594 },

5597 },

5601 }};

5602}

5603

5605AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {

5609

5610 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))

5611 return {};

5612

5613 return {{

5615 MIB.addReg(RSrcReg);

5616 },

5618 if (SOffset)

5619 MIB.addReg(SOffset);

5621 MIB.addReg(AMDGPU::SGPR_NULL);

5622 else

5624 },

5629 }};

5630}

5631

5633AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {

5634

5636

5638 SOffset = AMDGPU::SGPR_NULL;

5639

5641}

5642

5643

5644static std::optional<uint64_t>

5646

5648 if (!OffsetVal || !isInt<32>(*OffsetVal))

5649 return std::nullopt;

5650 return Lo_32(*OffsetVal);

5651}

5652

5654AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {

5655 std::optional<uint64_t> OffsetVal =

5657 if (!OffsetVal)

5658 return {};

5659

5660 std::optional<int64_t> EncodedImm =

5662 if (!EncodedImm)

5663 return {};

5664

5666}

5667

5669AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {

5671

5673 if (!OffsetVal)

5674 return {};

5675

5676 std::optional<int64_t> EncodedImm =

5678 if (!EncodedImm)

5679 return {};

5680

5682}

5683

5685AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {

5686

5687

5691 *MRI, Root.getReg(), KB, true);

5692 if (!SOffset)

5693 return std::nullopt;

5694

5695 std::optional<int64_t> EncodedOffset =

5697 if (!EncodedOffset)

5698 return std::nullopt;

5699

5703}

5704

5705std::pair<Register, unsigned>

5706AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,

5707 bool &Matched) const {

5708 Matched = false;

5709

5711 unsigned Mods;

5712 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());

5713

5716

5717

5718

5719

5721

5722 const auto CheckAbsNeg = [&]() {

5723

5724

5726 unsigned ModsTmp;

5727 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);

5728

5731

5734 }

5735 };

5736

5737 CheckAbsNeg();

5738

5739

5740

5741

5742

5743

5745

5748 CheckAbsNeg();

5749 }

5750

5751 Matched = true;

5752 }

5753

5754 return {Src, Mods};

5755}

5756

5758AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(

5761 unsigned Mods;

5762 bool Matched;

5763 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);

5764 if (!Matched)

5765 return {};

5766

5767 return {{

5770 }};

5771}

5772

5774AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {

5776 unsigned Mods;

5777 bool Matched;

5778 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);

5779

5780 return {{

5783 }};

5784}

5785

5786bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(

5790 Register CCReg = I.getOperand(0).getReg();

5791

5792 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))

5793 .addImm(I.getOperand(2).getImm());

5794

5795 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);

5796

5797 I.eraseFromParent();

5799 *MRI);

5800}

5801

5802bool AMDGPUInstructionSelector::selectSGetBarrierState(

5807 std::optional<int64_t> BarValImm =

5809

5810 if (!BarValImm) {

5811 auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

5814 }

5816 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM

5817 : AMDGPU::S_GET_BARRIER_STATE_M0;

5819

5820 auto DstReg = I.getOperand(0).getReg();

5822 TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);

5824 return false;

5826 if (BarValImm) {

5827 MIB.addImm(*BarValImm);

5828 }

5829 I.eraseFromParent();

5830 return true;

5831}

5832

5834 if (HasInlineConst) {

5835 switch (IntrID) {

5836 default:

5838 case Intrinsic::amdgcn_s_barrier_join:

5839 return AMDGPU::S_BARRIER_JOIN_IMM;

5840 case Intrinsic::amdgcn_s_get_named_barrier_state:

5841 return AMDGPU::S_GET_BARRIER_STATE_IMM;

5842 };

5843 } else {

5844 switch (IntrID) {

5845 default:

5847 case Intrinsic::amdgcn_s_barrier_join:

5848 return AMDGPU::S_BARRIER_JOIN_M0;

5849 case Intrinsic::amdgcn_s_get_named_barrier_state:

5850 return AMDGPU::S_GET_BARRIER_STATE_M0;

5851 };

5852 }

5853}

5854

5855bool AMDGPUInstructionSelector::selectNamedBarrierInit(

5861

5862

5863 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5864 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg0)

5865 .add(BarOp)

5868

5869 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5870 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg1)

5874

5875

5876 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5877 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg2)

5878 .add(CntOp)

5881

5882 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5883 constexpr unsigned ShAmt = 16;

5884 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg3)

5888

5889 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5890 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg4)

5894

5895 auto CopyMIB =

5896 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(TmpReg4);

5898

5899 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init

5900 ? AMDGPU::S_BARRIER_INIT_M0

5901 : AMDGPU::S_BARRIER_SIGNAL_M0;

5904

5905 I.eraseFromParent();

5906 return true;

5907}

5908

5909bool AMDGPUInstructionSelector::selectNamedBarrierInst(

5913 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state

5914 ? I.getOperand(2)

5915 : I.getOperand(1);

5916 std::optional<int64_t> BarValImm =

5918

5919 if (!BarValImm) {

5920

5921 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5922 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg0)

5926

5927 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

5928 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg1)

5932

5933 auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)

5936 }

5937

5939 unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID);

5941

5942 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {

5943 auto DstReg = I.getOperand(0).getReg();

5945 TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);

5947 return false;

5949 }

5950

5951 if (BarValImm) {

5952 auto BarId = ((*BarValImm) >> 4) & 0x3F;

5954 }

5955

5956 I.eraseFromParent();

5957 return true;

5958}

5959

5960void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,

5962 int OpIdx) const {

5963 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

5964 "Expected G_CONSTANT");

5965 MIB.addImm(MI.getOperand(1).getCImm()->getSExtValue());

5966}

5967

5968void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,

5970 int OpIdx) const {

5971 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

5972 "Expected G_CONSTANT");

5973 MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());

5974}

5975

5976void AMDGPUInstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB,

5978 int OpIdx) const {

5980 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);

5981 MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());

5982}

5983

5984void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,

5986 int OpIdx) const {

5987 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

5988 "Expected G_CONSTANT");

5989 MIB.addImm(MI.getOperand(1).getCImm()->getValue().popcount());

5990}

5991

5992

5993

5994void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,

5996 int OpIdx) const {

5998 int64_t Imm;

6001 else

6003}

6004

6005void AMDGPUInstructionSelector::renderZextBoolTImm(MachineInstrBuilder &MIB,

6007 int OpIdx) const {

6008 MIB.addImm(MI.getOperand(OpIdx).getImm() != 0);

6009}

6010

6011void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB,

6013 int OpIdx) const {

6014 assert(OpIdx >= 0 && "expected to match an immediate operand");

6016}

6017

6018void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(

6020 assert(OpIdx >= 0 && "expected to match an immediate operand");

6023}

6024

6025void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(

6027 assert(OpIdx >= 0 && "expected to match an immediate operand");

6028 MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x2)

6031}

6032

6033void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(

6035 assert(OpIdx >= 0 && "expected to match an immediate operand");

6038}

6039

6040void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(

6042 assert(OpIdx >= 0 && "expected to match an immediate operand");

6043 MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x1)

6045 : 0);

6046}

6047

6048void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(

6050 assert(OpIdx >= 0 && "expected to match an immediate operand");

6052 : 0);

6053}

6054

6055void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(

6057 assert(OpIdx >= 0 && "expected to match an immediate operand");

6059 : 0);

6060}

6061

6062void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(

6064 assert(OpIdx >= 0 && "expected to match an immediate operand");

6067}

6068

6069void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(

6071 assert(OpIdx >= 0 && "expected to match an immediate operand");

6074}

6075

6076void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,

6078 int OpIdx) const {

6079 assert(OpIdx >= 0 && "expected to match an immediate operand");

6080 MIB.addImm(MI.getOperand(OpIdx).getImm() &

6083}

6084

6085void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,

6087 int OpIdx) const {

6088 assert(OpIdx >= 0 && "expected to match an immediate operand");

6089 const bool Swizzle = MI.getOperand(OpIdx).getImm() &

6093}

6094

6095void AMDGPUInstructionSelector::renderExtractCpolSetGLC(

6097 assert(OpIdx >= 0 && "expected to match an immediate operand");

6098 const uint32_t Cpol = MI.getOperand(OpIdx).getImm() &

6102}

6103

6104void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,

6106 int OpIdx) const {

6108}

6109

6110void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,

6112 int OpIdx) const {

6113 const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF();

6115 assert(ExpVal != INT_MIN);

6117}

6118

6119void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,

6121 int OpIdx) const {

6122

6123

6124

6125

6126 MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);

6127}

6128

6129

6130void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(

6132 unsigned Val = MI.getOperand(OpIdx).getImm();

6133 unsigned New = 0;

6134 if (Val & 0x1)

6136 if (Val & 0x2)

6139}

6140

6141bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {

6143}

6144

6145bool AMDGPUInstructionSelector::isInlineImmediate(const APFloat &Imm) const {

6147}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder MachineInstrBuilder & DefMI

static unsigned getIntrinsicID(const SDNode *N)

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

static bool isNoUnsignedWrap(MachineInstr *Addr)

static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)

unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)

#define GET_GLOBALISEL_PREDICATES_INIT

#define GET_GLOBALISEL_TEMPORARIES_INIT

static Register getWaveAddress(const MachineInstr *Def)

static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)

static bool shouldUseAndMask(unsigned Size, unsigned &Mask)

static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)

static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)

static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)

static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)

Return the register to use for the index value, and the subregister to use for the indirectly accesse...

static void addZeroImm(MachineInstrBuilder &MIB)

static unsigned gwsIntrinToOpcode(unsigned IntrID)

static bool isConstant(const MachineInstr &MI)

static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)

static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)

Return a resource descriptor for use with an arbitrary 64-bit pointer.

static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)

Match a zero extend from a 32-bit value to 64-bits.

static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)

static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)

static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)

static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)

Get an immediate that must be 32-bits, and treated as zero extended.

static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)

static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)

static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)

static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)

This file declares the targeting of the InstructionSelector class for AMDGPU.

AMDGPU Register Bank Select

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

Provides analysis for querying information about KnownBits during GISel passes.

Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

const HexagonInstrInfo * TII

Contains matchers for matching SSA Machine Instructions.

This file declares the MachineIRBuilder class.

unsigned const TargetRegisterInfo * TRI

static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)

static const char * getName()

bool select(MachineInstr &I) override

Select the (possibly generic) instruction I to only use target-specific opcodes.

void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override

Setup per-MF executor state.

uint32_t getLDSSize() const

bool isEntryFunction() const

const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override

Get a register bank that covers RC.

bool useRealTrue16Insts() const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...

std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const

unsigned getWavefrontSizeLog2() const

unsigned getWavefrontSize() const

bool hasInv2PiInlineImm() const

static int64_t getNullPointerValue(unsigned AddrSpace)

Get the integer value of a null pointer in the given address space.

LLVM_READONLY int getExactLog2Abs() const

Class for arbitrary precision integers.

APInt zext(unsigned width) const

Zero extend to a new width.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

int64_t getSExtValue() const

Get sign extended value.

unsigned countr_one() const

Count the number of trailing one bits.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ FCMP_OEQ

0 0 0 1 True if ordered and equal

@ FCMP_TRUE

1 1 1 1 Always true (always folded)

@ ICMP_SLT

signed less than

@ ICMP_SLE

signed less or equal

@ FCMP_OLT

0 1 0 0 True if ordered and less than

@ FCMP_ULE

1 1 0 1 True if unordered, less than, or equal

@ FCMP_OGT

0 0 1 0 True if ordered and greater than

@ FCMP_OGE

0 0 1 1 True if ordered and greater than or equal

@ ICMP_UGE

unsigned greater or equal

@ ICMP_UGT

unsigned greater than

@ ICMP_SGT

signed greater than

@ FCMP_ULT

1 1 0 0 True if unordered or less than

@ FCMP_ONE

0 1 1 0 True if ordered and operands are unequal

@ FCMP_UEQ

1 0 0 1 True if unordered or equal

@ ICMP_ULT

unsigned less than

@ FCMP_UGT

1 0 1 0 True if unordered or greater than

@ FCMP_OLE

0 1 0 1 True if ordered and less than or equal

@ FCMP_ORD

0 1 1 1 True if ordered (no nans)

@ ICMP_SGE

signed greater or equal

@ FCMP_UNE

1 1 1 0 True if unordered or not equal

@ ICMP_ULE

unsigned less or equal

@ FCMP_UGE

1 0 1 1 True if unordered, greater than, or equal

@ FCMP_FALSE

0 0 0 0 Always false (always folded)

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

bool isFPPredicate() const

bool isIntPredicate() const

ConstantFP - Floating Point Values [float, double].

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

This class represents an Operation in the Expression.

Diagnostic information for unsupported feature in backend.

Represents a G_BUILD_VECTOR.

bool useVGPRIndexMode() const

bool hasPermlane32Swap() const

bool hasScalarCompareEq64() const

int getLDSBankCount() const

bool hasUsableDSOffset() const

True if the offset field of DS instructions works as expected.

bool unsafeDSOffsetFoldingEnabled() const

bool hasBitOp3Insts() const

bool hasFlatInstOffsets() const

bool hasCompressedExport() const

Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...

bool hasGFX90AInsts() const

bool hasLDSLoadB96_B128() const

Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...

unsigned getConstantBusLimit(unsigned Opcode) const

bool hasMADIntraFwdBug() const

bool privateMemoryResourceIsRangeChecked() const

bool hasSignedScratchOffsets() const

bool hasRestrictedSOffset() const

const SITargetLowering * getTargetLowering() const override

bool ldsRequiresM0Init() const

Return if most LDS instructions have an m0 use that require m0 to be initialized.

bool hasSPackHL() const

Return true if the target has the S_PACK_HL_B32_B16 instruction.

bool hasPermlane16Swap() const

bool hasFlatScratchSVSSwizzleBug() const

bool useFlatForGlobal() const

Generation getGeneration() const

bool hasSplitBarriers() const

bool hasUnpackedD16VMem() const

bool hasGWSSemaReleaseAll() const

bool hasAddNoCarry() const

bool hasSALUFloatInsts() const

bool hasPartialNSAEncoding() const

void checkSubtargetFeatures(const Function &F) const

Diagnose inconsistent subtarget features before attempting to codegen function F.

Represents a G_CONCAT_VECTORS.

std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns

virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)

Setup per-MF executor state.

CodeGenCoverage * CoverageInfo

APInt getKnownOnes(Register R)

KnownBits getKnownBits(Register R)

bool signBitIsZero(Register Op)

APInt getKnownZeroes(Register R)

Module * getParent()

Get the module that this global value is contained inside of...

constexpr bool isScalar() const

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

constexpr bool isValid() const

constexpr bool isVector() const

constexpr TypeSize getSizeInBits() const

Returns the total size of the type. Must only be called on sized types.

constexpr LLT getElementType() const

Returns the vector's element type. Only valid for vector types.

constexpr unsigned getAddressSpace() const

static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)

Get a low-level fixed-width vector of some number of elements and element width.

Describe properties that are true of each instruction in the target description file.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

void setReturnAddressIsTaken(bool s)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Helper class to build MachineInstr.

const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const

const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addFrameIndex(int Idx) const

const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

unsigned getNumOperands() const

Retuns the total number of operands.

void tieOperands(unsigned DefIdx, unsigned UseIdx)

Add a tie between the register operands at DefIdx and UseIdx.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

unsigned getAddrSpace() const

@ MOLoad

The memory access reads data.

@ MOStore

The memory access writes data.

const MachinePointerInfo & getPointerInfo() const

Flags getFlags() const

Return the raw flags of the source value,.

const Value * getValue() const

Return the base address of the memory access.

Align getBaseAlign() const

Return the minimum known alignment in bytes of the base address, without the offset.

MachineOperand class - Representation of each machine instruction operand.

unsigned getSubReg() const

const ConstantInt * getCImm() const

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

ArrayRef< int > getShuffleMask() const

void setReg(Register Reg)

Change the register this operand corresponds to.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

static MachineOperand CreateImm(int64_t Val)

bool isEarlyClobber() const

Register getReg() const

getReg - Returns the register number.

bool isInternalRead() const

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

A Module instance is used to store all the information related to an LLVM module.

Analysis providing profile information.

static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)

Constrain the (possibly generic) virtual register Reg to RC.

const RegisterBank & getRegBank(unsigned ID)

Get the register bank identified by ID.

TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const

Get the size in bits of Reg.

This class implements the register bank concept.

unsigned getID() const

Get the identifier of this register bank.

Wrapper class representing virtual and physical registers.

bool isLegalMUBUFImmOffset(unsigned Imm) const

bool isInlineConstant(const APInt &Imm) const

const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const

static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)

const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const

std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const

Split COffsetVal into {immediate offset field, remainder offset} values.

static unsigned getDSShaderTypeValue(const MachineFunction &MF)

bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const

Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.

void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)

MCRegister getReturnAddressReg(const MachineFunction &MF) const

ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const

const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const

const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override

const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const

const TargetRegisterClass * getBoolRC() const

MCRegister getExec() const

const TargetRegisterClass * getWaveMaskRegClass() const

static bool isSGPRClass(const TargetRegisterClass *RC)

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Register getStackPointerRegisterToSaveRestore() const

If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...

CodeGenOptLevel getOptLevel() const

Returns the optimization level: None, Less, Default, or Aggressive.

const Triple & getTargetTriple() const

unsigned getID() const

Return the register class ID number.

bool hasSubClassEq(const TargetRegisterClass *RC) const

Returns true if RC is a sub-class of or equal to this class.

bool hasSuperClassEq(const TargetRegisterClass *RC) const

Returns true if RC is a super-class of or equal to this class.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

OSType getOS() const

Get the parsed operating system type of this triple.

static IntegerType * getInt32Ty(LLVMContext &C)

LLVM Value Representation.

Value(Type *Ty, unsigned scid)

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ PRIVATE_ADDRESS

Address space for private memory.

constexpr char SymbolName[]

Key for Kernel::Metadata::mSymbolName.

LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)

LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)

std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)

bool isGFX12Plus(const MCSubtargetInfo &STI)

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)

bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)

bool isGFX11Plus(const MCSubtargetInfo &STI)

bool isGFX10Plus(const MCSubtargetInfo &STI)

std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)

unsigned getRegBitWidth(const TargetRegisterClass &RC)

Get the size in bits of a register from the register class RC.

LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

Intrinsic::ID getIntrinsicID(const MachineInstr &I)

Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.

const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)

std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)

Returns base register and constant offset.

IndexMode

ARM Index Modes.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

operand_type_match m_Reg()

GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)

UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)

SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)

Matches a constant equal to RequestedValue.

UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)

BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)

UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)

ConstantMatch< APInt > m_ICst(APInt &Cst)

SpecificConstantMatch m_AllOnesInt()

BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)

ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)

Or< Preds... > m_any_of(Preds &&... preds)

BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)

UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)

UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)

GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)

UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)

BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)

UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

OneUse_match< T > m_OneUse(const T &SubPattern)

BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)

Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

@ Kill

The last use of a register.

Reg

All possible values of the reg field in the ModR/M byte.

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())

Return a virtual register corresponding to the incoming argument register PhysReg.

Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)

Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...

MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)

See if Reg is defined by an single def instruction that is Opcode.

int popcount(T Value) noexcept

Count the number of set bits in a value.

const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)

If VReg is defined by a G_CONSTANT, return the corresponding value.

bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)

Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...

MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the def instruction for Reg, folding away any trivial copies.

std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)

If VReg is defined by a G_CONSTANT fits in int64_t returns it.

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

unsigned getUndefRegState(bool B)

@ SMax

Signed integer max implemented in terms of select(cmp()).

DWARFExpression::Operation Op

std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...

std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the def instruction for Reg, and underlying value Register folding away any copies.

Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the source register for Reg, folding away any trivial copies.

@ Default

The result values are uniform if and only if all operands are uniform.

This struct is a compact representation of a valid (non-zero power of two) alignment.

static KnownBits makeConstant(const APInt &C)

Create known bits from a known constant.

static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)

Compute knownbits resulting from addition of LHS and RHS.

This class contains a discriminated union of information about pointers in memory operands,...

int64_t Offset

Offset - This is an offset from the base Value*.

PointerUnion< const Value *, const PseudoSourceValue * > V

This is the IR pointer value for the access, or it is null if unknown.