LLVM: lib/Target/AMDGPU/SIShrinkInstructions.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

18

19#define DEBUG_TYPE "si-shrink-instructions"

20

22 "Number of 64-bit instruction reduced to 32-bit.");

24 "Number of literal constants folded into 32-bit instructions.");

25

26using namespace llvm;

27

28namespace {

29

30class SIShrinkInstructions {

36 bool IsPostRA;

37

38 bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;

42 bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;

52 unsigned SubReg) const;

54 unsigned SubReg) const;

56 unsigned I) const;

57 void dropInstructionKeepingImpDefs(MachineInstr &MI) const;

59

60public:

61 SIShrinkInstructions() = default;

63};

64

66

67public:

68 static char ID;

69

70 SIShrinkInstructionsLegacy() : MachineFunctionPass(ID) {}

71

72 bool runOnMachineFunction(MachineFunction &MF) override;

73

74 StringRef getPassName() const override { return "SI Shrink Instructions"; }

75

76 void getAnalysisUsage(AnalysisUsage &AU) const override {

79 }

80};

81

82}

83

85 "SI Shrink Instructions", false, false)

86

87char SIShrinkInstructionsLegacy::ID = 0;

88

90 return new SIShrinkInstructionsLegacy();

91}

92

93

94

95

96bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,

97 bool TryToCommute) const {

99

100 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);

101

102

103 MachineOperand &Src0 = MI.getOperand(Src0Idx);

104 if (Src0.isReg()) {

107 MachineInstr *Def = MRI->getUniqueVRegDef(Reg);

108 if (Def && Def->isMoveImmediate()) {

109 MachineOperand &MovSrc = Def->getOperand(1);

110 bool ConstantFolded = false;

111

112 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {

113 if (MovSrc.isImm()) {

115 ConstantFolded = true;

116 } else if (MovSrc.isFI()) {

118 ConstantFolded = true;

119 } else if (MovSrc.isGlobal()) {

122 ConstantFolded = true;

123 }

124 }

125

126 if (ConstantFolded) {

127 if (MRI->use_nodbg_empty(Reg))

128 Def->eraseFromParent();

129 ++NumLiteralConstantsFolded;

130 return true;

131 }

132 }

133 }

134 }

135

136

137 if (TryToCommute && MI.isCommutable()) {

139 if (foldImmediates(MI, false))

140 return true;

141

142

144 }

145 }

146

147 return false;

148}

149

150

151

152bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {

153 for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {

154 const MachineOperand &MO = MI.getOperand(I);

155 if (MO.isReg()) {

157 assert(Reg.isVirtual() && "Prior checks should ensure we only shrink "

158 "True16 Instructions post-RA");

159 if (AMDGPU::VGPR_32RegClass.contains(Reg) &&

160 !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))

161 return false;

162

163 if (AMDGPU::VGPR_16RegClass.contains(Reg) &&

164 !AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))

165 return false;

166 }

167 }

168 return true;

169}

170

171bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {

173 TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());

174}

175

176bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {

178 TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());

179}

180

181bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,

182 bool &IsUnsigned) const {

184 IsUnsigned = false;

185 return TII->isInlineConstant(Src);

186 }

187

189 IsUnsigned = true;

190 return TII->isInlineConstant(Src);

191 }

192

193 return false;

194}

195

196

197

198

199

200

201

202

203

204

207 int32_t &ModifiedImm, bool Scalar) {

208 if (TII->isInlineConstant(Src))

209 return 0;

210 int32_t SrcImm = static_cast<int32_t>(Src.getImm());

211

212 if (!Scalar) {

213

214

215

216 ModifiedImm = ~SrcImm;

217 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))

218 return AMDGPU::V_NOT_B32_e32;

219 }

220

222 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))

223 return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;

224

225 return 0;

226}

227

228

229

230void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,

231 MachineInstr &MI) const {

232 MachineFunction &MF = *MI.getMF();

233 for (unsigned i = MI.getDesc().getNumOperands() +

234 MI.getDesc().implicit_uses().size() +

235 MI.getDesc().implicit_defs().size(),

236 e = MI.getNumOperands();

237 i != e; ++i) {

238 const MachineOperand &MO = MI.getOperand(i);

241 }

242}

243

244void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {

246 return;

247

248

249

250 if (MI.getOperand(0).isReg())

252

253

254 const MachineOperand &Src0 = MI.getOperand(0);

255 if (!Src0.isReg())

256 return;

257

258 MachineOperand &Src1 = MI.getOperand(1);

259 if (!Src1.isImm())

260 return;

261

263 if (SOPKOpc == -1)

264 return;

265

266

267

268 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {

269 bool HasUImm;

270 if (isKImmOrKUImmOperand(Src1, HasUImm)) {

271 if (!HasUImm) {

272 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?

273 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;

275 }

276

277 MI.setDesc(TII->get(SOPKOpc));

278 }

279

280 return;

281 }

282

283 const MCInstrDesc &NewDesc = TII->get(SOPKOpc);

284

289 MI.setDesc(NewDesc);

290 }

291}

292

293

294void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {

297 return;

298

299 uint8_t NewEncoding;

300 switch (Info->MIMGEncoding) {

301 case AMDGPU::MIMGEncGfx10NSA:

302 NewEncoding = AMDGPU::MIMGEncGfx10Default;

303 break;

304 case AMDGPU::MIMGEncGfx11NSA:

305 NewEncoding = AMDGPU::MIMGEncGfx11Default;

306 break;

307 default:

308 return;

309 }

310

311 int VAddr0Idx =

312 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);

313 unsigned NewAddrDwords = Info->VAddrDwords;

314 const TargetRegisterClass *RC;

315

316 if (Info->VAddrDwords == 2) {

317 RC = &AMDGPU::VReg_64RegClass;

318 } else if (Info->VAddrDwords == 3) {

319 RC = &AMDGPU::VReg_96RegClass;

320 } else if (Info->VAddrDwords == 4) {

321 RC = &AMDGPU::VReg_128RegClass;

322 } else if (Info->VAddrDwords == 5) {

323 RC = &AMDGPU::VReg_160RegClass;

324 } else if (Info->VAddrDwords == 6) {

325 RC = &AMDGPU::VReg_192RegClass;

326 } else if (Info->VAddrDwords == 7) {

327 RC = &AMDGPU::VReg_224RegClass;

328 } else if (Info->VAddrDwords == 8) {

329 RC = &AMDGPU::VReg_256RegClass;

330 } else if (Info->VAddrDwords == 9) {

331 RC = &AMDGPU::VReg_288RegClass;

332 } else if (Info->VAddrDwords == 10) {

333 RC = &AMDGPU::VReg_320RegClass;

334 } else if (Info->VAddrDwords == 11) {

335 RC = &AMDGPU::VReg_352RegClass;

336 } else if (Info->VAddrDwords == 12) {

337 RC = &AMDGPU::VReg_384RegClass;

338 } else {

339 RC = &AMDGPU::VReg_512RegClass;

340 NewAddrDwords = 16;

341 }

342

343 unsigned VgprBase = 0;

344 unsigned NextVgpr = 0;

345 bool IsUndef = true;

346 bool IsKill = NewAddrDwords == Info->VAddrDwords;

348 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;

349 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;

350 for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {

351 const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);

352 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());

353 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;

354 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");

355

356 if (Idx == 0) {

357 VgprBase = Vgpr;

358 NextVgpr = Vgpr + Dwords;

359 } else if (Vgpr == NextVgpr) {

360 NextVgpr = Vgpr + Dwords;

361 } else {

362 return;

363 }

364

365 if (Op.isUndef())

366 IsUndef = false;

367 if (Op.isKill())

368 IsKill = false;

369 }

370

371 if (VgprBase + NewAddrDwords > 256)

372 return;

373

374

375

376 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);

377 int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);

378 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();

379 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();

380 int ToUntie = -1;

381 if (TFEVal || LWEVal) {

382

383 for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {

384 if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&

385 MI.getOperand(i).isImplicit()) {

386

388 ToUntie == -1 &&

389 "found more than one tied implicit operand when expecting only 1");

390 ToUntie = i;

391 MI.untieRegOperand(ToUntie);

392 }

393 }

394 }

395

397 Info->VDataDwords, NewAddrDwords);

398 MI.setDesc(TII->get(NewOpcode));

399 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));

400 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);

401 MI.getOperand(VAddr0Idx).setIsKill(IsKill);

402

403 for (unsigned i = 1; i < EndVAddr; ++i)

404 MI.removeOperand(VAddr0Idx + 1);

405

406 if (ToUntie >= 0) {

407 MI.tieOperands(

408 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),

409 ToUntie - (EndVAddr - 1));

410 }

411}

412

413

414void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {

415

416

418 return;

419

420

421 if (!IsPostRA)

422 return;

423

424 if (TII->hasAnyModifiersSet(MI))

425 return;

426

427 const unsigned Opcode = MI.getOpcode();

428 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);

429 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);

430 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);

431 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;

432

433 bool Swap;

434

435

436 if (Src2.isImm() && TII->isInlineConstant(Src2)) {

438 Swap = false;

440 Swap = true;

441 else

442 return;

443

444 switch (Opcode) {

445 default:

447 case AMDGPU::V_MAD_F32_e64:

448 NewOpcode = AMDGPU::V_MADAK_F32;

449 break;

450 case AMDGPU::V_FMA_F32_e64:

451 NewOpcode = AMDGPU::V_FMAAK_F32;

452 break;

453 case AMDGPU::V_MAD_F16_e64:

454 NewOpcode = AMDGPU::V_MADAK_F16;

455 break;

456 case AMDGPU::V_FMA_F16_e64:

457 case AMDGPU::V_FMA_F16_gfx9_e64:

458 NewOpcode = AMDGPU::V_FMAAK_F16;

459 break;

460 case AMDGPU::V_FMA_F16_gfx9_t16_e64:

461 NewOpcode = AMDGPU::V_FMAAK_F16_t16;

462 break;

463 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:

464 NewOpcode = AMDGPU::V_FMAAK_F16_fake16;

465 break;

466 case AMDGPU::V_FMA_F64_e64:

468 NewOpcode = AMDGPU::V_FMAAK_F64;

469 break;

470 }

471 }

472

473

475 if (Src1.isImm() && TII->isInlineConstant(Src1))

476 Swap = false;

477 else if (Src0.isImm() && TII->isInlineConstant(Src0))

478 Swap = true;

479 else

480 return;

481

482 switch (Opcode) {

483 default:

485 case AMDGPU::V_MAD_F32_e64:

486 NewOpcode = AMDGPU::V_MADMK_F32;

487 break;

488 case AMDGPU::V_FMA_F32_e64:

489 NewOpcode = AMDGPU::V_FMAMK_F32;

490 break;

491 case AMDGPU::V_MAD_F16_e64:

492 NewOpcode = AMDGPU::V_MADMK_F16;

493 break;

494 case AMDGPU::V_FMA_F16_e64:

495 case AMDGPU::V_FMA_F16_gfx9_e64:

496 NewOpcode = AMDGPU::V_FMAMK_F16;

497 break;

498 case AMDGPU::V_FMA_F16_gfx9_t16_e64:

499 NewOpcode = AMDGPU::V_FMAMK_F16_t16;

500 break;

501 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:

502 NewOpcode = AMDGPU::V_FMAMK_F16_fake16;

503 break;

504 case AMDGPU::V_FMA_F64_e64:

506 NewOpcode = AMDGPU::V_FMAMK_F64;

507 break;

508 }

509 }

510

511 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)

512 return;

513

515 return;

516

517 if (Swap) {

518

520 MI.getOperand(0).getReg())

521 .add(Src1)

522 .add(Src0)

523 .add(Src2)

525 MI.eraseFromParent();

526 } else {

527 TII->removeModOperands(MI);

528 MI.setDesc(TII->get(NewOpcode));

529 }

530}

531

532

533

534

535

536

537bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {

538 unsigned Opc = MI.getOpcode();

539 const MachineOperand *Dest = &MI.getOperand(0);

540 MachineOperand *Src0 = &MI.getOperand(1);

541 MachineOperand *Src1 = &MI.getOperand(2);

542 MachineOperand *SrcReg = Src0;

543 MachineOperand *SrcImm = Src1;

544

545 if (!SrcImm->isImm() ||

547 return false;

548

549 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());

550 uint32_t NewImm = 0;

551

552 if (Opc == AMDGPU::S_AND_B32) {

554 MI.findRegisterDefOperand(AMDGPU::SCC, nullptr)->isDead()) {

556 Opc = AMDGPU::S_BITSET0_B32;

558 NewImm = ~Imm;

559 Opc = AMDGPU::S_ANDN2_B32;

560 }

561 } else if (Opc == AMDGPU::S_OR_B32) {

563 MI.findRegisterDefOperand(AMDGPU::SCC, nullptr)->isDead()) {

565 Opc = AMDGPU::S_BITSET1_B32;

567 NewImm = ~Imm;

568 Opc = AMDGPU::S_ORN2_B32;

569 }

570 } else if (Opc == AMDGPU::S_XOR_B32) {

572 NewImm = ~Imm;

573 Opc = AMDGPU::S_XNOR_B32;

574 }

575 } else {

577 }

578

579 if (NewImm != 0) {

581 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());

582 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());

583 return true;

584 }

585

587 const bool IsUndef = SrcReg->isUndef();

588 const bool IsKill = SrcReg->isKill();

590 if (Opc == AMDGPU::S_BITSET0_B32 ||

591 Opc == AMDGPU::S_BITSET1_B32) {

593

594 MI.getOperand(2).ChangeToRegister(Dest->getReg(), false,

595 false, IsKill,

596 false, IsUndef);

597 MI.tieOperands(0, 2);

598 } else {

599 SrcImm->setImm(NewImm);

600 }

601 }

602 }

603

604 return false;

605}

606

607

608

609bool SIShrinkInstructions::instAccessReg(

611 unsigned SubReg) const {

612 for (const MachineOperand &MO : R) {

614 continue;

615

618 return true;

620 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &

622 if (Overlap.any())

623 return true;

624 }

625 }

626 return false;

627}

628

629bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,

630 unsigned SubReg) const {

631 return instAccessReg(MI->uses(), Reg, SubReg);

632}

633

634bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,

635 unsigned SubReg) const {

636 return instAccessReg(MI->defs(), Reg, SubReg);

637}

638

639TargetInstrInfo::RegSubRegPair

640SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,

641 unsigned I) const {

642 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {

644 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));

645 } else {

646 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));

647 }

648 }

649 return TargetInstrInfo::RegSubRegPair(Reg, Sub);

650}

651

652void SIShrinkInstructions::dropInstructionKeepingImpDefs(

653 MachineInstr &MI) const {

654 for (unsigned i = MI.getDesc().getNumOperands() +

655 MI.getDesc().implicit_uses().size() +

656 MI.getDesc().implicit_defs().size(),

657 e = MI.getNumOperands();

658 i != e; ++i) {

659 const MachineOperand &Op = MI.getOperand(i);

660 if (Op.isDef())

661 continue;

662 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),

663 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());

664 }

665

666 MI.eraseFromParent();

667}

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {

690 MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||

691 MovT.getOpcode() == AMDGPU::COPY);

692

695 MachineOperand &Xop = MovT.getOperand(1);

696

697 if (!Xop.isReg())

698 return nullptr;

701

702 unsigned Size = TII->getOpSize(MovT, 0);

703

704

705

706 if (Size == 2 && X.isVirtual())

707 return nullptr;

708

709 if (TRI->isVGPR(*MRI, X))

710 return nullptr;

711

712 const unsigned SearchLimit = 16;

713 unsigned Count = 0;

714 bool KilledT = false;

715 for (auto Iter = std::next(MovT.getIterator()),

717 Iter != E && Count < SearchLimit && !KilledT; ++Iter) {

718

719 MachineInstr *MovY = &*Iter;

722 continue;

724

725 if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&

726 MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&

727 MovY->getOpcode() != AMDGPU::COPY) ||

730 continue;

731

734

735 if (TRI->isVGPR(*MRI, Y))

736 continue;

737

738 MachineInstr *MovX = nullptr;

740 I != IY; ++I) {

741 if (I->isDebugInstr())

742 continue;

743 if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||

744 instModifiesReg(&*I, T, Tsub) ||

745 (MovX && instModifiesReg(&*I, X, Xsub))) {

746 MovX = nullptr;

747 break;

748 }

749 if (!instReadsReg(&*I, Y, Ysub)) {

750 if (!MovX && instModifiesReg(&*I, X, Xsub)) {

751 MovX = nullptr;

752 break;

753 }

754 continue;

755 }

756 if (MovX ||

757 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&

758 I->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&

759 I->getOpcode() != AMDGPU::COPY) ||

760 I->getOperand(0).getReg() != X ||

761 I->getOperand(0).getSubReg() != Xsub) {

762 MovX = nullptr;

763 break;

764 }

765

766 if (Size > 4 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))

767 continue;

768

769 MovX = &*I;

770 }

771

772 if (!MovX)

773 continue;

774

775 LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);

776

778 SmallVector<MachineInstr *, 4> Swaps;

779 if (Size == 2) {

781 TII->get(AMDGPU::V_SWAP_B16))

788 } else {

790 for (unsigned I = 0; I < Size / 4; ++I) {

791 TargetInstrInfo::RegSubRegPair X1, Y1;

792 X1 = getSubRegForIndex(X, Xsub, I);

793 Y1 = getSubRegForIndex(Y, Ysub, I);

795 TII->get(AMDGPU::V_SWAP_B32))

802 }

803 }

804

806 for (MachineInstr *Swap : Swaps) {

807 Swap->removeOperand(Swap->getNumExplicitOperands());

808 Swap->copyImplicitOps(*MBB.getParent(), *MovX);

809 }

810 }

812 dropInstructionKeepingImpDefs(*MovY);

814

815 if (T.isVirtual() && MRI->use_nodbg_empty(T)) {

816 dropInstructionKeepingImpDefs(MovT);

817 } else {

821 const MachineOperand &Op = MovT.getOperand(OpNo);

822 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))

824 }

825 }

826

828 }

829

830 return nullptr;

831}

832

833

834bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {

836 return false;

837

838 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);

839 if (Op)

840 return false;

842 if (SDstReg.isPhysical() || MRI->use_nodbg_empty(SDstReg))

843 return false;

844

845 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);

846 return true;

847}

848

849bool SIShrinkInstructions::run(MachineFunction &MF) {

850

851 this->MF = &MF;

857

858 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;

859

860 for (MachineBasicBlock &MBB : MF) {

863 Next = std::next(I);

864 MachineInstr &MI = *I;

865

866 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {

867

868

869

870

871

872

873

874 MachineOperand &Src = MI.getOperand(1);

875 if (Src.isImm() && IsPostRA) {

876 int32_t ModImm;

877 unsigned ModOpcode =

879 if (ModOpcode != 0) {

880 MI.setDesc(TII->get(ModOpcode));

881 Src.setImm(static_cast<int64_t>(ModImm));

882 continue;

883 }

884 }

885 }

886

887 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||

888 MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||

889 MI.getOpcode() == AMDGPU::COPY)) {

890 if (auto *NextMI = matchSwap(MI)) {

891 Next = NextMI->getIterator();

892 continue;

893 }

894 }

895

896

897 if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||

898 MI.getOpcode() == AMDGPU::S_MUL_I32) {

899 const MachineOperand *Dest = &MI.getOperand(0);

900 MachineOperand *Src0 = &MI.getOperand(1);

901 MachineOperand *Src1 = &MI.getOperand(2);

902

903 if (!Src0->isReg() && Src1->isReg()) {

906 }

907

908

909

910

912 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());

913 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());

914 continue;

915 }

916

919 unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?

920 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;

921

924 MI.tieOperands(0, 1);

925 }

926 }

927 }

928

929

930 if (MI.isCompare() && TII->isSOPC(MI)) {

931 shrinkScalarCompare(MI);

932 continue;

933 }

934

935

936 if (MI.getOpcode() == AMDGPU::S_MOV_B32) {

937 const MachineOperand &Dst = MI.getOperand(0);

938 MachineOperand &Src = MI.getOperand(1);

939

940 if (Src.isImm() && Dst.getReg().isPhysical()) {

941 unsigned ModOpc;

942 int32_t ModImm;

944 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));

947 true))) {

948 MI.setDesc(TII->get(ModOpc));

949 Src.setImm(static_cast<int64_t>(ModImm));

950 }

951 }

952

953 continue;

954 }

955

956

957 if (MI.getOpcode() == AMDGPU::S_AND_B32 ||

958 MI.getOpcode() == AMDGPU::S_OR_B32 ||

959 MI.getOpcode() == AMDGPU::S_XOR_B32) {

960 if (shrinkScalarLogicOp(MI))

961 continue;

962 }

963

964 if (IsPostRA && TII->isMIMG(MI.getOpcode()) &&

966 shrinkMIMG(MI);

967 continue;

968 }

969

970 if (TII->isVOP3(MI))

971 continue;

972

973 if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||

974 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||

975 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||

976 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||

977 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||

978 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||

979 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||

980 (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&

982 shrinkMadFma(MI);

983 continue;

984 }

985

986

987

988 if (TII->isVOP3(MI.getOpcode())) {

989 tryReplaceDeadSDST(MI);

990 if (TII->hasVALU32BitEncoding(MI.getOpcode())) {

991 continue;

992 }

993 }

994

995 if (TII->canShrink(MI, *MRI)) {

996

997

1000 tryReplaceDeadSDST(MI);

1001 continue;

1002 }

1003 }

1004

1006

1007 if (TII->isVOPC(Op32)) {

1008 MachineOperand &Op0 = MI.getOperand(0);

1009 if (Op0.isReg()) {

1010

1011

1014

1015

1016

1017

1018

1019

1020

1021

1022

1023 MRI->setRegAllocationHint(DstReg, 0, VCCReg);

1024 continue;

1025 }

1026 if (DstReg != VCCReg)

1027 continue;

1028 }

1029 }

1030

1031 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {

1032

1033

1034 const MachineOperand *Src2 =

1035 TII->getNamedOperand(MI, AMDGPU::OpName::src2);

1036 if (!Src2->isReg())

1037 continue;

1039 if (SReg.isVirtual()) {

1040 MRI->setRegAllocationHint(SReg, 0, VCCReg);

1041 continue;

1042 }

1043 if (SReg != VCCReg)

1044 continue;

1045 }

1046

1047

1048 const MachineOperand *SDst = TII->getNamedOperand(MI,

1049 AMDGPU::OpName::sdst);

1050

1051 if (SDst) {

1052 bool Next = false;

1053

1054 if (SDst->getReg() != VCCReg) {

1056 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);

1057 Next = true;

1058 }

1059

1060

1061

1062 const MachineOperand *Src2 = TII->getNamedOperand(MI,

1063 AMDGPU::OpName::src2);

1064 if (Src2 && Src2->getReg() != VCCReg) {

1066 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);

1067 Next = true;

1068 }

1069

1071 continue;

1072 }

1073

1074

1075

1076

1077

1078

1079

1082 !IsPostRA)

1083 continue;

1084

1086 !shouldShrinkTrue16(MI))

1087 continue;

1088

1089

1091

1092 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);

1093 ++NumInstructionsShrunk;

1094

1095

1096 copyExtraImplicitOps(*Inst32, MI);

1097

1098

1099 if (SDst && SDst->isDead())

1101

1102 MI.eraseFromParent();

1103 foldImmediates(*Inst32);

1104

1105 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');

1106 }

1107 }

1108 return false;

1109}

1110

1111bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {

1113 return false;

1114

1115 return SIShrinkInstructions().run(MF);

1116}

1117

1118PreservedAnalyses

1123

1126 return PA;

1127}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Analysis containing CSE Info

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)

Definition SIShrinkInstructions.cpp:205

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

bool hasTrue16BitInsts() const

Return true if the subtarget supports True16 instructions.

bool hasInv2PiInlineImm() const

Class for arbitrary precision integers.

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

Represents analyses that only rely on functions' control flow.

FunctionPass class - This class is used to implement most global optimizations.

bool hasOptNone() const

Do not optimize this function (-O0).

bool hasFmaakFmamkF64Insts() const

const SIInstrInfo * getInstrInfo() const override

bool hasGFX10_3Insts() const

bool has64BitLiterals() const

unsigned getNSAMaxSize(bool HasSampler=false) const

Generation getGeneration() const

bool hasVOP3Literal() const

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

instr_iterator instr_end()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineFunctionProperties & getProperties() const

Get the function properties.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & setMIFlags(unsigned Flags) const

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

unsigned getNumImplicitOperands() const

Returns the implicit operands number.

const MachineBasicBlock * getParent() const

bool isDebugInstr() const

LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)

Add the specified operand to the instruction.

LLVM_ABI unsigned getNumExplicitOperands() const

Returns the number of non-implicit operands.

bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const

Return true if the MachineInstr kills the specified register.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

LLVM_ABI void removeOperand(unsigned OpNo)

Erase an operand from an instruction, leaving it with one fewer operand than it started with.

const MachineOperand & getOperand(unsigned i) const

LLVM_ABI bool hasRegisterImplicitUseOperand(Register Reg) const

Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...

MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)

Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...

MachineOperand class - Representation of each machine instruction operand.

unsigned getSubReg() const

const GlobalValue * getGlobal() const

LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)

Replace this operand with a frame index.

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

bool isRegMask() const

isRegMask - Tests if this is a MO_RegisterMask operand.

void setIsDead(bool Val=true)

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)

ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.

LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)

ChangeToGA - Replace this operand with a new global address operand.

void setIsKill(bool Val=true)

unsigned getTargetFlags() const

bool isGlobal() const

isGlobal - Tests if this is a MO_GlobalAddress operand.

Register getReg() const

getReg - Returns the register number.

bool isFI() const

isFI - Tests if this is a MO_FrameIndex operand.

int64_t getOffset() const

Return the offset from the symbol in this operand.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Wrapper class representing virtual and physical registers.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

static bool sopkIsZext(unsigned Opcode)

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &)

Definition SIShrinkInstructions.cpp:1119

void push_back(const T &Elt)

const TargetRegisterInfo & getRegisterInfo() const

MachineInstr * commuteInstruction(MachineInstr &MI, bool NewMI=false, unsigned OpIdx1=CommuteAnyOperandIndex, unsigned OpIdx2=CommuteAnyOperandIndex) const

This method commutes the operands of the given machine instruction MI.

MCRegister getRegister(unsigned i) const

Return the specified register in the class.

self_iterator getIterator()

A range adaptor for a pair of iterators.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_READONLY int getSOPKOp(uint16_t Opcode)

LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)

LLVM_READONLY int getVOPe32(uint16_t Opcode)

bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)

Is this a KImm operand?

bool isTrue16Inst(unsigned Opc)

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

constexpr bool isInt(int64_t x)

Checks if an integer fits into the given bit width.

int countr_one(T Value)

Count the number of ones from the least significant bit to the first zero bit.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionAddr VTableAddr Count

constexpr bool isUInt(uint64_t x)

Checks if an unsigned integer fits into the given bit width.

iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >

@ Sub

Subtraction of integers.

FunctionAddr VTableAddr Next

DWARFExpression::Operation Op

constexpr int32_t SignExtend32(uint32_t X)

Sign-extend the number in the bottom B bits of X to a 32-bit integer.

constexpr T reverseBits(T Val)

Reverse the bits in Val.

constexpr int64_t SignExtend64(uint64_t x)

Sign-extend the number in the bottom B bits of X to a 64-bit integer.

FunctionPass * createSIShrinkInstructionsLegacyPass()

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

constexpr bool any() const

A pair composed of a register and a sub-register index.