LLVM: lib/Target/AMDGPU/SIPeepholeSDWA.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

29#include

30

31using namespace llvm;

32

33#define DEBUG_TYPE "si-peephole-sdwa"

34

35STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");

37 "Number of instruction converted to SDWA.");

38

39namespace {

40

43class SDWAOperand;

44class SDWADstOperand;

45

48

49class SIPeepholeSDWA {

50private:

54

56 SDWAOperandsMap PotentialMatches;

58

59 std::optional<int64_t> foldToImm(const MachineOperand &Op) const;

60

62 std::unique_ptr matchSDWAOperand(MachineInstr &MI);

67 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);

69

70public:

72};

73

75public:

76 static char ID;

77

78 SIPeepholeSDWALegacy() : MachineFunctionPass(ID) {}

79

80 StringRef getPassName() const override { return "SI Peephole SDWA"; }

81

82 bool runOnMachineFunction(MachineFunction &MF) override;

83

84 void getAnalysisUsage(AnalysisUsage &AU) const override {

87 }

88};

89

91

92class SDWAOperand {

93private:

94 MachineOperand *Target;

95 MachineOperand *Replaced;

96

97

98

99 virtual bool canCombineSelections(const MachineInstr &MI,

100 const SIInstrInfo *TII) = 0;

101

102public:

103 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)

104 : Target(TargetOp), Replaced(ReplacedOp) {

105 assert(Target->isReg());

106 assert(Replaced->isReg());

107 }

108

109 virtual ~SDWAOperand() = default;

110

111 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII,

112 const GCNSubtarget &ST,

113 SDWAOperandsMap *PotentialMatches = nullptr) = 0;

114 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;

115

116 MachineOperand *getTargetOperand() const { return Target; }

117 MachineOperand *getReplacedOperand() const { return Replaced; }

118 MachineInstr *getParentInst() const { return Target->getParent(); }

119

120 MachineRegisterInfo *getMRI() const {

121 return &getParentInst()->getMF()->getRegInfo();

122 }

123

124#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

125 virtual void print(raw_ostream& OS) const = 0;

127#endif

128};

129

130class SDWASrcOperand : public SDWAOperand {

131private:

133 bool Abs;

134 bool Neg;

135 bool Sext;

136

137public:

138 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,

139 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,

140 bool Sext_ = false)

141 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),

142 Neg(Neg_), Sext(Sext_) {}

143

144 MachineInstr *potentialToConvert(const SIInstrInfo *TII,

145 const GCNSubtarget &ST,

146 SDWAOperandsMap *PotentialMatches = nullptr) override;

147 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;

148 bool canCombineSelections(const MachineInstr &MI,

149 const SIInstrInfo *TII) override;

150

151 SdwaSel getSrcSel() const { return SrcSel; }

152 bool getAbs() const { return Abs; }

153 bool getNeg() const { return Neg; }

154 bool getSext() const { return Sext; }

155

156 uint64_t getSrcMods(const SIInstrInfo *TII,

157 const MachineOperand *SrcOp) const;

158

159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

160 void print(raw_ostream& OS) const override;

161#endif

162};

163

164class SDWADstOperand : public SDWAOperand {

165private:

168

169public:

170 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,

172 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}

173

174 MachineInstr *potentialToConvert(const SIInstrInfo *TII,

175 const GCNSubtarget &ST,

176 SDWAOperandsMap *PotentialMatches = nullptr) override;

177 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;

178 bool canCombineSelections(const MachineInstr &MI,

179 const SIInstrInfo *TII) override;

180

181 SdwaSel getDstSel() const { return DstSel; }

182 DstUnused getDstUnused() const { return DstUn; }

183

184#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

185 void print(raw_ostream& OS) const override;

186#endif

187};

188

189class SDWADstPreserveOperand : public SDWADstOperand {

190private:

191 MachineOperand *Preserve;

192

193public:

194 SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,

195 MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)

196 : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),

197 Preserve(PreserveOp) {}

198

199 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;

200 bool canCombineSelections(const MachineInstr &MI,

201 const SIInstrInfo *TII) override;

202

203 MachineOperand *getPreservedOperand() const { return Preserve; }

204

205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

206 void print(raw_ostream& OS) const override;

207#endif

208};

209

210}

211

213 false)

214

215char SIPeepholeSDWALegacy::ID = 0;

216

218

220 return new SIPeepholeSDWALegacy();

221}

222

223#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

225 switch(Sel) {

226 case BYTE_0: OS << "BYTE_0"; break;

227 case BYTE_1: OS << "BYTE_1"; break;

228 case BYTE_2: OS << "BYTE_2"; break;

229 case BYTE_3: OS << "BYTE_3"; break;

230 case WORD_0: OS << "WORD_0"; break;

231 case WORD_1: OS << "WORD_1"; break;

232 case DWORD: OS << "DWORD"; break;

233 }

234 return OS;

235}

236

238 switch(Un) {

239 case UNUSED_PAD: OS << "UNUSED_PAD"; break;

240 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;

242 }

243 return OS;

244}

245

247void SDWASrcOperand::print(raw_ostream& OS) const {

248 OS << "SDWA src: " << *getTargetOperand()

249 << " src_sel:" << getSrcSel()

250 << " abs:" << getAbs() << " neg:" << getNeg()

251 << " sext:" << getSext() << '\n';

252}

253

255void SDWADstOperand::print(raw_ostream& OS) const {

256 OS << "SDWA dst: " << *getTargetOperand()

257 << " dst_sel:" << getDstSel()

258 << " dst_unused:" << getDstUnused() << '\n';

259}

260

262void SDWADstPreserveOperand::print(raw_ostream& OS) const {

263 OS << "SDWA preserve dst: " << *getTargetOperand()

264 << " dst_sel:" << getDstSel()

265 << " preserve:" << *getPreservedOperand() << '\n';

266}

267

268#endif

269

275 if (To.isUse()) {

277 } else {

279 }

280}

281

283 return LHS.isReg() &&

284 RHS.isReg() &&

285 LHS.getReg() == RHS.getReg() &&

286 LHS.getSubReg() == RHS.getSubReg();

287}

288

291 if (Reg->isReg() || Reg->isDef())

292 return nullptr;

293

294 return MRI->getOneNonDBGUse(Reg->getReg());

295}

296

299 if (Reg->isReg())

300 return nullptr;

301

302 return MRI->getOneDef(Reg->getReg());

303}

304

305

306

307

308

309

310

312 if (Sel == SdwaSel::DWORD)

313 return OperandSel;

314

315 if (Sel == OperandSel || OperandSel == SdwaSel::DWORD)

316 return Sel;

317

318 if (Sel == SdwaSel::WORD_1 || Sel == SdwaSel::BYTE_2 ||

319 Sel == SdwaSel::BYTE_3)

320 return {};

321

322 if (OperandSel == SdwaSel::WORD_0)

323 return Sel;

324

325 if (OperandSel == SdwaSel::WORD_1) {

326 if (Sel == SdwaSel::BYTE_0)

327 return SdwaSel::BYTE_2;

328 if (Sel == SdwaSel::BYTE_1)

329 return SdwaSel::BYTE_3;

330 if (Sel == SdwaSel::WORD_0)

331 return SdwaSel::WORD_1;

332 }

333

334 return {};

335}

336

337uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,

338 const MachineOperand *SrcOp) const {

339 uint64_t Mods = 0;

341 if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {

342 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {

343 Mods = Mod->getImm();

344 }

345 } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {

346 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {

347 Mods = Mod->getImm();

348 }

349 }

350 if (Abs || Neg) {

352 "Float and integer src modifiers can't be set simultaneously");

355 } else if (Sext) {

357 }

358

359 return Mods;

360}

361

362MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,

363 const GCNSubtarget &ST,

364 SDWAOperandsMap *PotentialMatches) {

365 if (PotentialMatches != nullptr) {

366

367 MachineOperand *Reg = getReplacedOperand();

368 if (Reg->isReg() || Reg->isDef())

369 return nullptr;

370

371 for (MachineInstr &UseMI : getMRI()->use_nodbg_instructions(Reg->getReg()))

372

373 if (!isConvertibleToSDWA(UseMI, ST, TII) ||

374 !canCombineSelections(UseMI, TII))

375 return nullptr;

376

377

378

379 for (MachineOperand &UseMO : getMRI()->use_nodbg_operands(Reg->getReg())) {

380

382

383 SDWAOperandsMap &potentialMatchesMap = *PotentialMatches;

384 MachineInstr *UseMI = UseMO.getParent();

385 potentialMatchesMap[UseMI].push_back(this);

386 }

387 return nullptr;

388 }

389

390

391

392 MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());

393 if (!PotentialMO)

394 return nullptr;

395

396 MachineInstr *Parent = PotentialMO->getParent();

397

398 return canCombineSelections(*Parent, TII) ? Parent : nullptr;

399}

400

401bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {

402 switch (MI.getOpcode()) {

403 case AMDGPU::V_CVT_F32_FP8_sdwa:

404 case AMDGPU::V_CVT_F32_BF8_sdwa:

405 case AMDGPU::V_CVT_PK_F32_FP8_sdwa:

406 case AMDGPU::V_CVT_PK_F32_BF8_sdwa:

407

408 return false;

409 case AMDGPU::V_CNDMASK_B32_sdwa:

410

411

412

413

414

415

416

417

418

419

420

421 if (Sext)

422 return false;

423 break;

424 }

425

426

427

428 bool IsPreserveSrc = false;

429 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

430 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);

431 MachineOperand *SrcMods =

432 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);

433 assert(Src && (Src->isReg() || Src->isImm()));

434 if (isSameReg(*Src, *getReplacedOperand())) {

435

436 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

437 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);

438 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);

439

440 if (!Src ||

441 isSameReg(*Src, *getReplacedOperand())) {

442

443

444

445

446

447

448 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

450 TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);

451

452 if (Dst &&

453 DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {

454

455

456

457

459 TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel));

460 if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&

461 getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {

462 IsPreserveSrc = true;

463 auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),

464 AMDGPU::OpName::vdst);

465 auto TiedIdx = MI.findTiedOperandIdx(DstIdx);

466 Src = &MI.getOperand(TiedIdx);

467 SrcSel = nullptr;

468 SrcMods = nullptr;

469 } else {

470

471 return false;

472 }

473 }

474 }

475 assert(Src && Src->isReg());

476

477 if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||

478 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||

479 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||

480 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&

481 isSameReg(*Src, *getReplacedOperand())) {

482

483

484 return false;

485 }

486

488 (IsPreserveSrc || (SrcSel && SrcMods)));

489 }

491 if (!IsPreserveSrc) {

494 SrcMods->setImm(getSrcMods(TII, Src));

495 }

496 getTargetOperand()->setIsKill(false);

497 return true;

498}

499

500

501

503 AMDGPU::OpName SrcSelOpName, SdwaSel OpSel) {

505

508

510}

511

512

513

514

516 AMDGPU::OpName SrcOpName,

520

523 return true;

524

526}

527

528bool SDWASrcOperand::canCombineSelections(const MachineInstr &MI,

529 const SIInstrInfo *TII) {

530 if (TII->isSDWA(MI.getOpcode()))

531 return true;

532

533 using namespace AMDGPU;

534

536 getReplacedOperand(), getSrcSel()) &&

538 getReplacedOperand(), getSrcSel());

539}

540

541MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,

542 const GCNSubtarget &ST,

543 SDWAOperandsMap *PotentialMatches) {

544

545

546 MachineRegisterInfo *MRI = getMRI();

547 MachineInstr *ParentMI = getParentInst();

548

549 MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);

550 if (!PotentialMO)

551 return nullptr;

552

553

554 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {

555 if (&UseInst != ParentMI)

556 return nullptr;

557 }

558

559 MachineInstr *Parent = PotentialMO->getParent();

560 return canCombineSelections(*Parent, TII) ? Parent : nullptr;

561}

562

563bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {

564

565

566 if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||

567 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||

568 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||

569 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&

571

572 return false;

573 }

574

575 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

577 Operand->isReg() &&

578 isSameReg(*Operand, *getReplacedOperand()));

580 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);

582

585

586 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);

588 DstUnused->setImm(getDstUnused());

589

590

591

592 getParentInst()->eraseFromParent();

593 return true;

594}

595

596bool SDWADstOperand::canCombineSelections(const MachineInstr &MI,

597 const SIInstrInfo *TII) {

598 if (TII->isSDWA(MI.getOpcode()))

599 return true;

600

602}

603

604bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,

605 const SIInstrInfo *TII) {

606

607

608

609 for (MachineOperand &MO : MI.uses()) {

610 if (!MO.isReg())

611 continue;

612 getMRI()->clearKillFlags(MO.getReg());

613 }

614

615

616 MI.getParent()->remove(&MI);

617 getParentInst()->getParent()->insert(getParentInst(), &MI);

618

619

620 MachineInstrBuilder MIB(*MI.getMF(), MI);

621 MIB.addReg(getPreservedOperand()->getReg(),

623 getPreservedOperand()->getSubReg());

624

625

626 MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),

627 MI.getNumOperands() - 1);

628

629

630 return SDWADstOperand::convertToSDWA(MI, TII);

631}

632

633bool SDWADstPreserveOperand::canCombineSelections(const MachineInstr &MI,

634 const SIInstrInfo *TII) {

635 return SDWADstOperand::canCombineSelections(MI, TII);

636}

637

638std::optional<int64_t>

639SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {

640 if (Op.isImm()) {

641 return Op.getImm();

642 }

643

644

645

646 if (Op.isReg()) {

647 for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {

649 continue;

650

651 const MachineInstr *DefInst = Def.getParent();

652 if (TII->isFoldableCopy(*DefInst))

653 return std::nullopt;

654

655 const MachineOperand &Copied = DefInst->getOperand(1);

656 if (!Copied.isImm())

657 return std::nullopt;

658

659 return Copied.getImm();

660 }

661 }

662

663 return std::nullopt;

664}

665

666std::unique_ptr

667SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {

668 unsigned Opcode = MI.getOpcode();

669 switch (Opcode) {

670 case AMDGPU::V_LSHRREV_B32_e32:

671 case AMDGPU::V_ASHRREV_I32_e32:

672 case AMDGPU::V_LSHLREV_B32_e32:

673 case AMDGPU::V_LSHRREV_B32_e64:

674 case AMDGPU::V_ASHRREV_I32_e64:

675 case AMDGPU::V_LSHLREV_B32_e64: {

676

677

678

679

680

681

682

683

684 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

685 auto Imm = foldToImm(*Src0);

686 if (!Imm)

687 break;

688

689 if (*Imm != 16 && *Imm != 24)

690 break;

691

692 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

693 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

695 Dst->getReg().isPhysical())

696 break;

697

698 if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||

699 Opcode == AMDGPU::V_LSHLREV_B32_e64) {

700 return std::make_unique(

702 }

703 return std::make_unique(

704 Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,

705 Opcode != AMDGPU::V_LSHRREV_B32_e32 &&

706 Opcode != AMDGPU::V_LSHRREV_B32_e64);

707 break;

708 }

709

710 case AMDGPU::V_LSHRREV_B16_e32:

711 case AMDGPU::V_ASHRREV_I16_e32:

712 case AMDGPU::V_LSHLREV_B16_e32:

713 case AMDGPU::V_LSHRREV_B16_e64:

714 case AMDGPU::V_LSHRREV_B16_opsel_e64:

715 case AMDGPU::V_ASHRREV_I16_e64:

716 case AMDGPU::V_LSHLREV_B16_opsel_e64:

717 case AMDGPU::V_LSHLREV_B16_e64: {

718

719

720

721

722

723

724

725

726 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

727 auto Imm = foldToImm(*Src0);

728 if (!Imm || *Imm != 8)

729 break;

730

731 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

732 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

733

735 Dst->getReg().isPhysical())

736 break;

737

738 if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||

739 Opcode == AMDGPU::V_LSHLREV_B16_opsel_e64 ||

740 Opcode == AMDGPU::V_LSHLREV_B16_e64)

741 return std::make_unique(Dst, Src1, BYTE_1, UNUSED_PAD);

742 return std::make_unique(

743 Src1, Dst, BYTE_1, false, false,

744 Opcode != AMDGPU::V_LSHRREV_B16_e32 &&

745 Opcode != AMDGPU::V_LSHRREV_B16_opsel_e64 &&

746 Opcode != AMDGPU::V_LSHRREV_B16_e64);

747 break;

748 }

749

750 case AMDGPU::V_BFE_I32_e64:

751 case AMDGPU::V_BFE_U32_e64: {

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

767 auto Offset = foldToImm(*Src1);

769 break;

770

771 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);

772 auto Width = foldToImm(*Src2);

773 if (!Width)

774 break;

775

777

778 if (*Offset == 0 && *Width == 8)

780 else if (*Offset == 0 && *Width == 16)

782 else if (*Offset == 0 && *Width == 32)

784 else if (*Offset == 8 && *Width == 8)

786 else if (*Offset == 16 && *Width == 8)

788 else if (*Offset == 16 && *Width == 16)

790 else if (*Offset == 24 && *Width == 8)

792 else

793 break;

794

795 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

796 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

797

799 Dst->getReg().isPhysical())

800 break;

801

802 return std::make_unique(

803 Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);

804 }

805

806 case AMDGPU::V_AND_B32_e32:

807 case AMDGPU::V_AND_B32_e64: {

808

809

810

811

812 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

813 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

814 auto *ValSrc = Src1;

815 auto Imm = foldToImm(*Src0);

816

817 if (!Imm) {

818 Imm = foldToImm(*Src1);

819 ValSrc = Src0;

820 }

821

822 if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))

823 break;

824

825 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

826

827 if (!ValSrc->isReg() || ValSrc->getReg().isPhysical() ||

828 Dst->getReg().isPhysical())

829 break;

830

831 return std::make_unique(

832 ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);

833 }

834

835 case AMDGPU::V_OR_B32_e32:

836 case AMDGPU::V_OR_B32_e64: {

837

838

839

840

841

842

843

844

845

846 using CheckRetType =

847 std::optional<std::pair<MachineOperand *, MachineOperand *>>;

848 auto CheckOROperandsForSDWA =

849 [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {

850 if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())

851 return CheckRetType(std::nullopt);

852

854 if (!Op1Def)

855 return CheckRetType(std::nullopt);

856

857 MachineInstr *Op1Inst = Op1Def->getParent();

858 if (TII->isSDWA(*Op1Inst))

859 return CheckRetType(std::nullopt);

860

862 if (!Op2Def)

863 return CheckRetType(std::nullopt);

864

865 return CheckRetType(std::pair(Op1Def, Op2Def));

866 };

867

868 MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

869 MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

870 assert(OrSDWA && OrOther);

871 auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);

872 if (!Res) {

873 OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

874 OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

875 assert(OrSDWA && OrOther);

876 Res = CheckOROperandsForSDWA(OrSDWA, OrOther);

877 if (!Res)

878 break;

879 }

880

881 MachineOperand *OrSDWADef = Res->first;

882 MachineOperand *OrOtherDef = Res->second;

883 assert(OrSDWADef && OrOtherDef);

884

885 MachineInstr *SDWAInst = OrSDWADef->getParent();

886 MachineInstr *OtherInst = OrOtherDef->getParent();

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908 if (TII->isSDWA(*OtherInst))

909 break;

910

912 TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));

914 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));

915

916 bool DstSelAgree = false;

917 switch (DstSel) {

918 case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||

919 (OtherDstSel == BYTE_3) ||

920 (OtherDstSel == WORD_1));

921 break;

922 case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||

923 (OtherDstSel == BYTE_1) ||

924 (OtherDstSel == WORD_0));

925 break;

926 case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||

927 (OtherDstSel == BYTE_2) ||

928 (OtherDstSel == BYTE_3) ||

929 (OtherDstSel == WORD_1));

930 break;

931 case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||

932 (OtherDstSel == BYTE_2) ||

933 (OtherDstSel == BYTE_3) ||

934 (OtherDstSel == WORD_1));

935 break;

936 case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||

937 (OtherDstSel == BYTE_1) ||

938 (OtherDstSel == BYTE_3) ||

939 (OtherDstSel == WORD_0));

940 break;

941 case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||

942 (OtherDstSel == BYTE_1) ||

943 (OtherDstSel == BYTE_2) ||

944 (OtherDstSel == WORD_0));

945 break;

946 default: DstSelAgree = false;

947 }

948

949 if (!DstSelAgree)

950 break;

951

952

954 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));

955 if (OtherDstUnused != DstUnused::UNUSED_PAD)

956 break;

957

958

959 MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

961

962 return std::make_unique(

963 OrDst, OrSDWADef, OrOtherDef, DstSel);

964

965 }

966 }

967

968 return std::unique_ptr(nullptr);

969}

970

971#if !defined(NDEBUG)

973 Operand.print(OS);

974 return OS;

975}

976#endif

977

978void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {

979 for (MachineInstr &MI : MBB) {

980 if (auto Operand = matchSDWAOperand(MI)) {

981 LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');

982 SDWAOperands[&MI] = std::move(Operand);

983 ++NumSDWAPatternsFound;

984 }

985 }

986}

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

1001

1002

1003

1004

1005

1006void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,

1007 const GCNSubtarget &ST) const {

1008 int Opc = MI.getOpcode();

1009 assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&

1010 "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");

1011

1012

1013 if (TII->canShrink(MI, *MRI))

1014 return;

1016

1017 const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);

1018 if (!Sdst)

1019 return;

1021 if (!NextOp)

1022 return;

1023 MachineInstr &MISucc = *NextOp->getParent();

1024

1025

1026 MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);

1027 if (!CarryIn)

1028 return;

1029 MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);

1030 if (!CarryOut)

1031 return;

1032 if (MRI->hasOneNonDBGUse(CarryIn->getReg()) ||

1033 MRI->use_nodbg_empty(CarryOut->getReg()))

1034 return;

1035

1036 MachineBasicBlock &MBB = *MI.getParent();

1040 return;

1041

1042 for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();

1043 I != E; ++I) {

1044 if (I->modifiesRegister(AMDGPU::VCC, TRI))

1045 return;

1046 }

1047

1048

1050 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))

1051 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))

1052 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))

1054

1055 MI.eraseFromParent();

1056

1057

1058

1060}

1061

1062

1063

1064

1065

1066

1067void SIPeepholeSDWA::convertVcndmaskToVOP2(MachineInstr &MI,

1068 const GCNSubtarget &ST) const {

1069 assert(MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64);

1070

1072 if (TII->canShrink(MI, *MRI)) {

1073 LLVM_DEBUG(dbgs() << "Cannot shrink instruction\n");

1074 return;

1075 }

1076

1077 const MachineOperand &CarryIn =

1078 *TII->getNamedOperand(MI, AMDGPU::OpName::src2);

1080 MachineInstr *CarryDef = MRI->getVRegDef(CarryReg);

1081 if (!CarryDef) {

1082 LLVM_DEBUG(dbgs() << "Missing carry-in operand definition\n");

1083 return;

1084 }

1085

1086

1087 MCRegister Vcc = TRI->getVCC();

1088 MachineBasicBlock &MBB = *MI.getParent();

1092 LLVM_DEBUG(dbgs() << "VCC not known to be dead before instruction\n");

1093 return;

1094 }

1095

1097

1100 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))

1101 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))

1102 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))

1104 TII->fixImplicitOperands(*Converted);

1105 LLVM_DEBUG(dbgs() << "Converted to VOP2: " << *Converted);

1106 (void)Converted;

1107 MI.eraseFromParent();

1108}

1109

1110namespace {

1111bool isConvertibleToSDWA(MachineInstr &MI,

1112 const GCNSubtarget &ST,

1113 const SIInstrInfo* TII) {

1114

1115 unsigned Opc = MI.getOpcode();

1116 if (TII->isSDWA(Opc))

1117 return true;

1118

1119

1120

1121 if (Opc == AMDGPU::V_CNDMASK_B32_e64)

1122 return false;

1123

1124

1127

1129 return false;

1130

1131 if (ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))

1132 return false;

1133

1134 if (TII->isVOPC(Opc)) {

1135 if (ST.hasSDWASdst()) {

1136 const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);

1137 if (SDst && (SDst->getReg() != AMDGPU::VCC &&

1138 SDst->getReg() != AMDGPU::VCC_LO))

1139 return false;

1140 }

1141

1142 if (ST.hasSDWAOutModsVOPC() &&

1143 (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||

1144 TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))

1145 return false;

1146

1147 } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||

1148 TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {

1149 return false;

1150 }

1151

1152 if (ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||

1153 Opc == AMDGPU::V_FMAC_F32_e32 ||

1154 Opc == AMDGPU::V_MAC_F16_e32 ||

1155 Opc == AMDGPU::V_MAC_F32_e32))

1156 return false;

1157

1158

1159 if (TII->pseudoToMCOpcode(Opc) == -1)

1160 return false;

1161

1162 if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {

1163 if (!Src0->isReg() && !Src0->isImm())

1164 return false;

1165 }

1166

1167 if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {

1168 if (!Src1->isReg() && !Src1->isImm())

1169 return false;

1170 }

1171

1172 return true;

1173}

1174}

1175

1176MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {

1177 unsigned Opcode = MI.getOpcode();

1179

1181 if (SDWAOpcode == -1)

1183 assert(SDWAOpcode != -1);

1184

1185 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);

1186

1187

1188 MachineInstrBuilder SDWAInst =

1189 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)

1191

1192

1193 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);

1194 if (Dst) {

1196 SDWAInst.add(*Dst);

1197 } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {

1199 SDWAInst.add(*Dst);

1200 } else {

1203 }

1204

1205

1206

1207 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);

1210 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))

1212 else

1214 SDWAInst.add(*Src0);

1215

1216

1217 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);

1218 if (Src1) {

1221 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))

1223 else

1225 SDWAInst.add(*Src1);

1226 }

1227

1228 if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||

1229 SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||

1230 SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||

1231 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {

1232

1233 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);

1235 SDWAInst.add(*Src2);

1236 }

1237

1238

1240 MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);

1241 if (Clamp) {

1242 SDWAInst.add(*Clamp);

1243 } else {

1245 }

1246

1247

1249 MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);

1250 if (OMod) {

1251 SDWAInst.add(*OMod);

1252 } else {

1254 }

1255 }

1256

1257

1259 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);

1260

1262 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);

1263

1265 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);

1266

1267 if (Src1) {

1269 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);

1270 }

1271

1272

1273 MachineInstr *Ret = SDWAInst.getInstr();

1274 TII->fixImplicitOperands(*Ret);

1275 return Ret;

1276}

1277

1278bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,

1279 const SDWAOperandsVector &SDWAOperands) {

1281

1282 MachineInstr *SDWAInst;

1283 if (TII->isSDWA(MI.getOpcode())) {

1284

1285

1286

1287 SDWAInst = MI.getMF()->CloneMachineInstr(&MI);

1288 MI.getParent()->insert(MI.getIterator(), SDWAInst);

1289 } else {

1290 SDWAInst = createSDWAVersion(MI);

1291 }

1292

1293

1294 bool Converted = false;

1295 for (auto &Operand : SDWAOperands) {

1296 LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);

1297

1298

1299

1300

1301

1302

1303

1304

1305

1306

1307 if (PotentialMatches.count(Operand->getParentInst()) == 0)

1308 Converted |= Operand->convertToSDWA(*SDWAInst, TII);

1309 }

1310

1311 if (!Converted) {

1313 return false;

1314 }

1315

1316 ConvertedInstructions.push_back(SDWAInst);

1317 for (MachineOperand &MO : SDWAInst->uses()) {

1318 if (!MO.isReg())

1319 continue;

1320

1321 MRI->clearKillFlags(MO.getReg());

1322 }

1323 LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');

1324 ++NumSDWAInstructionsPeepholed;

1325

1326 MI.eraseFromParent();

1327 return true;

1328}

1329

1330

1331

1332void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,

1333 const GCNSubtarget &ST) const {

1334 const MCInstrDesc &Desc = TII->get(MI.getOpcode());

1335 unsigned ConstantBusCount = 0;

1336 for (MachineOperand &Op : MI.explicit_uses()) {

1337 if (Op.isReg()) {

1338 if (TRI->isVGPR(*MRI, Op.getReg()))

1339 continue;

1340

1341 if (ST.hasSDWAScalar() && ConstantBusCount == 0) {

1342 ++ConstantBusCount;

1343 continue;

1344 }

1345 } else if (Op.isImm())

1346 continue;

1347

1348 unsigned I = Op.getOperandNo();

1350 if (!OpRC || TRI->isVSSuperClass(OpRC))

1351 continue;

1352

1353 Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

1354 auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),

1355 TII->get(AMDGPU::V_MOV_B32_e32), VGPR);

1356 if (Op.isImm())

1357 Copy.addImm(Op.getImm());

1358 else if (Op.isReg())

1360 Op.getSubReg());

1361 Op.ChangeToRegister(VGPR, false);

1362 }

1363}

1364

1365bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) {

1367 return false;

1368

1369 return SIPeepholeSDWA().run(MF);

1370}

1371

1372bool SIPeepholeSDWA::run(MachineFunction &MF) {

1373 const GCNSubtarget &ST = MF.getSubtarget();

1374

1375 if (ST.hasSDWA())

1376 return false;

1377

1379 TRI = ST.getRegisterInfo();

1380 TII = ST.getInstrInfo();

1381

1382

1383 bool Ret = false;

1384 for (MachineBasicBlock &MBB : MF) {

1386 do {

1387

1388

1389

1390

1391 matchSDWAOperands(MBB);

1392 for (const auto &OperandPair : SDWAOperands) {

1393 const auto &Operand = OperandPair.second;

1394 MachineInstr *PotentialMI = Operand->potentialToConvert(TII, ST);

1395 if (!PotentialMI)

1396 continue;

1397

1398 switch (PotentialMI->getOpcode()) {

1399 case AMDGPU::V_ADD_CO_U32_e64:

1400 case AMDGPU::V_SUB_CO_U32_e64:

1401 pseudoOpConvertToVOP2(*PotentialMI, ST);

1402 break;

1403 case AMDGPU::V_CNDMASK_B32_e64:

1404 convertVcndmaskToVOP2(*PotentialMI, ST);

1405 break;

1406 };

1407 }

1408 SDWAOperands.clear();

1409

1410

1411 matchSDWAOperands(MBB);

1412

1413 for (const auto &OperandPair : SDWAOperands) {

1414 const auto &Operand = OperandPair.second;

1415 MachineInstr *PotentialMI =

1416 Operand->potentialToConvert(TII, ST, &PotentialMatches);

1417

1418 if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST, TII))

1419 PotentialMatches[PotentialMI].push_back(Operand.get());

1420 }

1421

1422 for (auto &PotentialPair : PotentialMatches) {

1423 MachineInstr &PotentialMI = *PotentialPair.first;

1424 convertToSDWA(PotentialMI, PotentialPair.second);

1425 }

1426

1427 PotentialMatches.clear();

1428 SDWAOperands.clear();

1429

1431

1433 Ret = true;

1434 while (!ConvertedInstructions.empty())

1435 legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);

1437 }

1438

1439 return Ret;

1440}

1441

1446

1449 return PA;

1450}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

This file implements a map that provides insertion order iteration.

Promote Memory to Register

static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)

Definition SIPeepholeSDWA.cpp:297

static void copyRegOperand(MachineOperand &To, const MachineOperand &From)

Definition SIPeepholeSDWA.cpp:270

static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)

Definition SIPeepholeSDWA.cpp:289

static std::optional< SdwaSel > combineSdwaSel(SdwaSel Sel, SdwaSel OperandSel)

Combine an SDWA instruction's existing SDWA selection Sel with the SDWA selection OperandSel of its o...

Definition SIPeepholeSDWA.cpp:311

static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)

Definition SIPeepholeSDWA.cpp:282

static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII, AMDGPU::OpName SrcSelOpName, SdwaSel OpSel)

Verify that the SDWA selection operand SrcSelOpName of the SDWA instruction MI can be combined with t...

Definition SIPeepholeSDWA.cpp:502

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

Represents analyses that only rely on functions' control flow.

FunctionPass class - This class is used to implement most global optimizations.

bool hasOptNone() const

Do not optimize this function (-O0).

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const

Return whether (physical) register Reg has been defined and not killed as of just before Before.

LivenessQueryResult

Possible outcome of a register liveness query to computeRegisterLiveness()

@ LQR_Dead

Register is known to be fully dead.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & setMIFlags(unsigned Flags) const

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

LLVM_ABI void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)

Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...

mop_range uses()

Returns all operands which may be register uses.

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

void setSubReg(unsigned subReg)

unsigned getSubReg() const

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

void setIsDead(bool Val=true)

LLVM_ABI void setReg(Register Reg)

Change the register this operand corresponds to.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

void setIsKill(bool Val=true)

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

void setIsUndef(bool Val=true)

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

This class implements a map that also provides access to all stored values in a deterministic order.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition SIPeepholeSDWA.cpp:1442

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum) const

Given a machine instruction descriptor, returns the register class constraint for OpNum,...

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

LLVM_READONLY int getVOPe32(uint16_t Opcode)

LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)

LLVM_READONLY int getSDWAOp(uint16_t Opcode)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Define

Register definition.

@ Kill

The last use of a register.

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

FunctionPass * createSIPeepholeSDWALegacyPass()

Definition SIPeepholeSDWA.cpp:219

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

DWARFExpression::Operation Op

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

char & SIPeepholeSDWALegacyID