LLVM: lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

38

39using namespace llvm;

40

41#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"

42

43namespace {

44

46 "Number of MFMA instructions rewritten to use AGPR form");

47

48

50

51class AMDGPURewriteAGPRCopyMFMAImpl {

62

65

66public:

72 TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),

73 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}

74

75 bool isRewriteCandidate(const MachineInstr &MI) const {

77 }

78

79

80

81

82

83

86 if (!PhysReg)

88

89

90

92 return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();

93 }

94

97

98

99

100

101

102

103

104

105

106

107

108 bool recomputeRegClassExceptRewritable(

111

112 bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;

113 bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;

114

115

116

117 void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,

119

120

121

122

124 SpillReferenceMap &Map) const;

125

126

127

128 void eliminateSpillsOfReassignedVGPRs() const;

129

131};

132

133bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(

137

138

139 while (!Worklist.empty()) {

142

143

145 if (OldRC == NewRC)

146 return false;

147

148

150

152

153

154

155

156

157 if (isRewriteCandidate(*MI)) {

161 TII.getRegClass(AGPRDesc, MO.getOperandNo());

162 if (TRI.hasAGPRs(NewRC))

163 return false;

164

166 TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);

168 TII.getNamedOperand(*MI, AMDGPU::OpName::src2);

170 if (Op->isReg())

171 continue;

172

175 return false;

176

177 if (OtherReg != Reg && RewriteRegs.insert(OtherReg))

179 }

180

184 dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"

187

188 if (Src2->isReg()) {

192 }

193

194 dbgs() << "]: " << MI;

195 });

196

198 }

199

200 continue;

201 }

202

203 unsigned OpNo = &MO - &MI->getOperand(0);

204 NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);

205 if (!NewRC || NewRC == OldRC) {

207 << " cannot be reassigned to "

208 << TRI.getRegClassName(NewRC) << ": " << *MI);

209 return false;

210 }

211 }

212 }

213

214 return true;

215}

216

217bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(

219

220

223

224

225

226 RewriteRegs.insert(MFMAHintReg);

227

228

229

230

231

232

233

234

235

236

237 if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,

238 RewriteRegs)) {

239 LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "

241 return false;

242 }

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260 using RecoloringStack =

262 RecoloringStack TentativeReassignments;

263

264 for (Register RewriteReg : RewriteRegs) {

266 TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});

268 }

269

270 if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {

271

272 for (auto [LI, OldAssign] : TentativeReassignments) {

273 if (VRM.hasPhys(LI->reg()))

275 LRM.assign(*LI, OldAssign);

276 }

277

278 return false;

279 }

280

281

282

283 for (Register InterferingReg : RewriteRegs) {

285 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));

286 MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);

287 }

288

289 for (MachineInstr *RewriteCandidate : RewriteCandidates) {

290 int NewMFMAOp =

292 RewriteCandidate->setDesc(TII.get(NewMFMAOp));

293 ++NumMFMAsRewrittenToAGPR;

294 }

295

296 return true;

297}

298

299

300

301

302bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(

304

305

306

307

308 for (Register InterferingReg : InterferingRegs) {

311 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));

312

313 MCPhysReg Assignable = AMDGPU::NoRegister;

314 if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&

317

318

319

320

321

322

323

324 Assignable = PrefPhysReg;

325 } else {

327 RegClassInfo.getOrder(EquivalentAGPRRegClass);

330 Assignable = Reg;

331 break;

332 }

333 }

334 }

335

336 if (!Assignable) {

339 << " to a free AGPR\n");

340 return false;

341 }

342

344 << " to " << printReg(Assignable, &TRI) << '\n');

345 LRM.assign(ReassignLI, Assignable);

346 }

347

348 return true;

349}

350

351

352

353

354

355

356

357bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(

359 bool MadeChange = false;

361 if (UseMI.isCopy())

362 continue;

363

364 Register CopySrcReg = UseMI.getOperand(1).getReg();

366 continue;

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381 for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {

382 if (isRewriteCandidate(CopySrcDefMI) &&

383 tryReassigningMFMAChain(

384 CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))

385 MadeChange = true;

386 }

387 }

388

389 return MadeChange;

390}

391

392

393

394

395

396

397

398

399bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(

401 bool MadeChange = false;

403 if (UseMI.isCopy())

404 continue;

405

406 Register CopyDstReg = UseMI.getOperand(0).getReg();

408 continue;

409 for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {

410 if (!CopyUseMO.readsReg())

411 continue;

412

414 if (isRewriteCandidate(CopyUseMI)) {

415 if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,

416 VRM.getPhys(CopyDstReg)))

417 MadeChange = true;

418 }

419 }

420 }

421

422 return MadeChange;

423}

424

425void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(

431 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY), VReg)

433 } else {

434 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY))

437 }

438

441}

442

443void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(

445

447 for (const LiveInterval *LI : StackIntervals)

448 NeededFrameIndexes.insert(LI->reg().stackSlotIndex());

449

453 if (!MO.isFI() || !NeededFrameIndexes.count(MO.getIndex()))

454 continue;

455

456 if (TII.isVGPRSpill(MI)) {

459 break;

460 }

461

462

463

464

465

466 NeededFrameIndexes.erase(MO.getIndex());

467 Map.erase(MO.getIndex());

468 }

469 }

470 }

471}

472

473void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {

475 if (NumSlots == 0)

476 return;

477

479

481 StackIntervals.reserve(NumSlots);

482

483 for (auto &[Slot, LI] : LSS) {

485 continue;

486

488 if (TRI.hasVGPRs(RC))

490 }

491

493

494

495 if (A->weight() != B->weight())

496 return A->weight() > B->weight();

497

498 if (A->getSize() != B->getSize())

499 return A->getSize() > B->getSize();

500

501

502 return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();

503 });

504

505

506

507

508

509

510

511

512

513

514

515

516

518 collectSpillIndexUses(StackIntervals, SpillSlotReferences);

519

521 int Slot = LI->reg().stackSlotIndex();

522 auto SpillReferences = SpillSlotReferences.find(Slot);

523 if (SpillReferences == SpillSlotReferences.end())

524 continue;

525

527

529 << " by reassigning\n");

530

532

533 for (MCPhysReg PhysReg : AllocOrder) {

535 continue;

536

539

541 Register NewVReg = MRI.createVirtualRegister(RC);

542

543 for (MachineInstr *SpillMI : SpillReferences->second)

544 replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg);

545

546

547

548

551 LRM.assign(NewLI, PhysReg);

553 break;

554 }

555 }

556}

557

558bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {

559

560

562 return false;

563

564

566 LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");

567 return false;

568 }

569

570 bool MadeChange = false;

571

572 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {

574 MCRegister AssignedAGPR = getAssignedAGPR(VReg);

575 if (!AssignedAGPR)

576 continue;

577

578 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))

579 MadeChange = true;

580 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))

581 MadeChange = true;

582 }

583

584

585

586

587 if (MadeChange)

588 eliminateSpillsOfReassignedVGPRs();

589

590 return MadeChange;

591}

592

594public:

595 static char ID;

597

601 }

602

604

605 StringRef getPassName() const override {

606 return "AMDGPU Rewrite AGPR-Copy-MFMA";

607 }

608

609 void getAnalysisUsage(AnalysisUsage &AU) const override {

614

619

622 }

623};

624

625}

626

628 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)

635

636char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;

637

639 AMDGPURewriteAGPRCopyMFMALegacy::ID;

640

641bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(

643 if (skipFunction(MF.getFunction()))

644 return false;

645

646 RegClassInfo.runOnMachineFunction(MF);

647

648 auto &VRM = getAnalysis().getVRM();

649 auto &LRM = getAnalysis().getLRM();

650 auto &LIS = getAnalysis().getLIS();

651 auto &LSS = getAnalysis().getLS();

652 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);

653 return Impl.run(MF);

654}

655

665

666 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);

667 if (!Impl.run(MF))

671 .preserve()

673 .preserve()

675 .preserve();

676 return PA;

677}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

const TargetInstrInfo & TII

AMDGPU Rewrite AGPR Copy MFMA

Definition AMDGPURewriteAGPRCopyMFMA.cpp:634

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Interface definition for SIRegisterInfo.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition AMDGPURewriteAGPRCopyMFMA.cpp:657

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

void setPreservesAll()

Set by analyses that do not transform their input at all.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Represents analyses that only rely on functions' control flow.

bool hasGFX90AInsts() const

LiveInterval - This class represents the liveness of a register, or stack slot.

LiveInterval & getInterval(Register Reg)

LiveInterval & createAndComputeVirtRegInterval(Register Reg)

SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)

void unassign(const LiveInterval &VirtReg)

Unassign VirtReg from its PhysReg.

bool isPhysRegUsed(MCRegister PhysReg) const

Returns true if the given PhysReg has any live intervals assigned.

@ IK_Free

No interference, go ahead and assign.

void assign(const LiveInterval &VirtReg, MCRegister PhysReg)

Assign VirtReg to PhysReg.

InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)

Check for interference before assigning VirtReg to PhysReg.

unsigned getNumIntervals() const

Describe properties that are true of each instruction in the target description file.

Wrapper class representing physical registers. Should be passed by value.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

bool isSpillSlotObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a spill slot.

void RemoveStackObject(int ObjectIdx)

Remove or mark dead a statically sized stack object.

bool isDeadObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a dead object.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

const MachineBasicBlock * getParent() const

bool mayStore(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly modify memory.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)

runOnFunction - Prepare to answer questions about MF.

ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const

getOrder - Returns the preferred allocation order for RC.

Wrapper class representing virtual and physical registers.

static Register index2VirtReg(unsigned Index)

Convert a 0-based index to a virtual register number.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A SetVector that performs no allocations if smaller than a certain size.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void reserve(size_type N)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

bool contains(Register Reg) const

Return true if the specified register is included in this register class.

MCRegister getPhys(Register virtReg) const

returns the physical register mapped to the specified virtual register

bool hasPhys(Register virtReg) const

returns true if the specified virtual register is mapped to a physical register

LLVM_READONLY int getMFMASrcCVDstAGPROp(uint16_t Opcode)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &)

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

DWARFExpression::Operation Op

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

char & AMDGPURewriteAGPRCopyMFMALegacyID

Definition AMDGPURewriteAGPRCopyMFMA.cpp:638

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.