LLVM: lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

33

34#define DEBUG_TYPE "amdgpu-regbanklegalize"

35

36using namespace llvm;

37using namespace AMDGPU;

39

40namespace {

41

42

43template

45m_GAMDGPUReadAnyLane(const SrcTy &Src) {

47}

48

50public:

51 static char ID;

52

53public:

55

57

58 StringRef getPassName() const override {

59 return "AMDGPU Register Bank Legalize";

60 }

61

62 void getAnalysisUsage(AnalysisUsage &AU) const override {

67 }

68

69

70

73 }

74};

75

76}

77

79 "AMDGPU Register Bank Legalize", false, false)

84 "AMDGPU Register Bank Legalize", false, false)

85

86char AMDGPURegBankLegalize::ID = 0;

87

89

91 return new AMDGPURegBankLegalize();

92}

93

96 static std::mutex GlobalMutex;

98 CacheForRuleSet;

99 std::lock_guardstd::mutex Lock(GlobalMutex);

100 auto [It, Inserted] = CacheForRuleSet.try_emplace(ST.getGeneration());

101 if (Inserted)

102 It->second = std::make_unique(ST, MRI);

103 else

104 It->second->refreshRefs(ST, MRI);

105 return *It->second;

106}

107

115

120

121public:

124 : B(B), MRI(*B.getMRI()), TRI(TRI),

125 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),

126 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),

127 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {};

128

130 std::pair<MachineInstr *, Register> tryMatch(Register Src, unsigned Opcode);

134

138};

139

141 const RegisterBank *RB = MRI.getRegBankOrNull(Reg);

142 if (RB && RB->getID() == AMDGPU::VCCRegBankID)

143 return true;

144

146 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(1);

147}

148

149std::pair<MachineInstr *, Register>

156

157std::pair<GUnmerge *, int>

159 MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);

160 if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)

161 return {nullptr, -1};

162

165 return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};

166

167 return {nullptr, -1};

168}

169

171

173 if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))

174 return RALSrc;

175

176

177

178

181 return RALSrc;

182 }

183

184

185

186

189 return RALSrc;

190 }

191

192

193

194

195

198 unsigned NumElts = Merge->getNumSources();

200 if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)

201 return {};

202

203

204 for (unsigned i = 1; i < NumElts; ++i) {

206 if (UnmergeI != Unmerge || (unsigned)IdxI != i)

207 return {};

208 }

209 return Unmerge->getSourceReg();

210 }

211

212

213

214

216 if (!UnMerge)

217 return {};

218

219 int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);

221 if (Merge || UnMerge->getNumDefs() != Merge->getNumSources())

222 return {};

223

225 if (MRI.getType(Src) != MRI.getType(SrcRegIdx))

226 return {};

227

228 auto [RALEl, RALElSrc] = tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);

229 if (RALEl)

230 return RALElSrc;

231

232 return {};

233}

234

237 if (Dst.isVirtual())

238 MRI.replaceRegWith(Dst, Src);

239 else

240 B.buildCopy(Dst, Src);

241}

242

245 Register Dst = Copy.getOperand(0).getReg();

246 Register Src = Copy.getOperand(1).getReg();

247

248

249 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)

250 : !TRI.isVGPR(MRI, Dst))

251 return false;

252

253

254 if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))

255 return false;

256

259 if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)

261

263 if (!RALSrc)

264 return false;

265

266 B.setInstr(Copy);

267 if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {

268

269

270

271

273 } else {

274

275

276

277

278

279

280 auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);

282 }

283

285 return true;

286}

287

290 return;

291

292 Register Dst = MI.getOperand(0).getReg();

293 Register Src = MI.getOperand(1).getReg();

294

295 if (!Dst.isVirtual() || !Src.isVirtual())

296 return;

297

298

299

300

301

302

303

304

305 if (isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {

306 auto [Trunc, TruncS32Src] = tryMatch(Src, AMDGPU::G_TRUNC);

307 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&

308 "sgpr S1 must be result of G_TRUNC of sgpr S32");

309

310 B.setInstr(MI);

311

312 auto One = B.buildConstant({SgprRB, S32}, 1);

313 auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);

314 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});

316 }

317}

318

320

321

322

323

324 Register Dst = MI.getOperand(0).getReg();

325 Register Src = MI.getOperand(1).getReg();

326 if (MRI.getType(Src) != S1)

327 return;

328

329 auto [Trunc, TruncSrc] = tryMatch(Src, AMDGPU::G_TRUNC);

330 if (!Trunc)

331 return;

332

333 LLT DstTy = MRI.getType(Dst);

334 LLT TruncSrcTy = MRI.getType(TruncSrc);

335

336 if (DstTy == TruncSrcTy) {

337 MRI.replaceRegWith(Dst, TruncSrc);

339 return;

340 }

341

342 B.setInstr(MI);

343

344 if (DstTy == S32 && TruncSrcTy == S64) {

345 auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);

346 MRI.replaceRegWith(Dst, Unmerge.getReg(0));

348 return;

349 }

350

351 if (DstTy == S64 && TruncSrcTy == S32) {

352 B.buildMergeLikeInstr(MI.getOperand(0).getReg(),

353 {TruncSrc, B.buildUndef({SgprRB, S32})});

355 return;

356 }

357

358 if (DstTy == S32 && TruncSrcTy == S16) {

359 B.buildAnyExt(Dst, TruncSrc);

361 return;

362 }

363

364 if (DstTy == S16 && TruncSrcTy == S32) {

365 B.buildTrunc(Dst, TruncSrc);

367 return;

368 }

369

371}

372

373

376 for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {

379 continue;

380

382 if (RB && RB->getID() == AMDGPU::SGPRRegBankID) {

383 LLVM_DEBUG(dbgs() << "Warning: detected sgpr S1 register in: ";

385 return Reg;

386 }

387 }

388

389 return {};

390}

391

392bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {

394 return false;

395

396

397 const TargetPassConfig &TPC = getAnalysis();

398 GISelCSEAnalysisWrapper &Wrapper =

399 getAnalysis().getCSEWrapper();

401 GISelObserverWrapper Observer;

403

404 CSEMIRBuilder B(MF);

405 B.setCSEInfo(&CSEInfo);

406 B.setChangeObserver(Observer);

407

408 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);

409 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);

410

411 const GCNSubtarget &ST = MF.getSubtarget();

413 const RegisterBankInfo &RBI = *ST.getRegBankInfo();

415 getAnalysis().getUniformityInfo();

416

417

418 const RegBankLegalizeRules &RBLRules = getRules(ST, MRI);

419

420

421 RegBankLegalizeHelper RBLHelper(B, MUI, RBI, RBLRules);

422

424

425 for (MachineBasicBlock &MBB : MF) {

426 for (MachineInstr &MI : MBB) {

428 }

429 }

430

431 for (MachineInstr *MI : AllInst) {

432 if (MI->isPreISelOpcode())

433 continue;

434

435 unsigned Opc = MI->getOpcode();

436

437 if (Opc == AMDGPU::G_PHI) {

438 if (!RBLHelper.applyMappingPHI(*MI))

439 return false;

440 continue;

441 }

442

443

444

445 if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||

446 Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) {

447 RBLHelper.applyMappingTrivial(*MI);

448 continue;

449 }

450

451

452 if (Opc == G_FREEZE &&

454 RBLHelper.applyMappingTrivial(*MI);

455 continue;

456 }

457

458 if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT ||

459 Opc == AMDGPU::G_IMPLICIT_DEF)) {

460 Register Dst = MI->getOperand(0).getReg();

461

463 assert(MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);

464 continue;

465 }

466

467

468 }

469

470 if (!RBLHelper.findRuleAndApplyMapping(*MI))

471 return false;

472 }

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497 AMDGPURegBankLegalizeCombiner Combiner(B, *ST.getRegisterInfo(), RBI);

498

499 for (MachineBasicBlock &MBB : MF) {

501 if (MI.getOpcode() == AMDGPU::COPY) {

502 Combiner.tryCombineCopy(MI);

503 continue;

504 }

505 if (MI.getOpcode() == AMDGPU::G_ANYEXT) {

506 Combiner.tryCombineS1AnyExt(MI);

507 continue;

508 }

509 }

510 }

511

513 "Registers with sgpr reg bank and S1 LLT are not legal after "

514 "AMDGPURegBankLegalize. Should lower to sgpr S32");

515

516 return true;

517}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

amdgpu aa AMDGPU Address space based Alias Analysis Wrapper

static Register getAnySgprS1(const MachineRegisterInfo &MRI)

Definition AMDGPURegBankLegalize.cpp:374

const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)

Definition AMDGPURegBankLegalize.cpp:94

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Provides analysis for continuously CSEing during GISel passes.

This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.

AMD GCN specific subclass of TargetSubtarget.

Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

Contains matchers for matching SSA Machine Instructions.

Machine IR instance of the generic uniformity analysis.

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static bool isValid(const char C)

Returns true if C is a valid mangled character: <0-9a-zA-Z_>.

Target-Independent Code Generator Pass Configuration Options pass.

std::pair< GUnmerge *, int > tryMatchRALFromUnmerge(Register Src)

Definition AMDGPURegBankLegalize.cpp:158

void replaceRegWithOrBuildCopy(Register Dst, Register Src)

Definition AMDGPURegBankLegalize.cpp:235

AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)

Definition AMDGPURegBankLegalize.cpp:122

bool isLaneMask(Register Reg)

Definition AMDGPURegBankLegalize.cpp:140

void tryCombineS1AnyExt(MachineInstr &MI)

Definition AMDGPURegBankLegalize.cpp:319

std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)

Definition AMDGPURegBankLegalize.cpp:150

Register getReadAnyLaneSrc(Register Src)

Definition AMDGPURegBankLegalize.cpp:170

void tryCombineCopy(MachineInstr &MI)

Definition AMDGPURegBankLegalize.cpp:288

bool tryEliminateReadAnyLane(MachineInstr &Copy)

Definition AMDGPURegBankLegalize.cpp:243

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

FunctionPass class - This class is used to implement most global optimizations.

The actual analysis pass wrapper.

void addObserver(GISelChangeObserver *O)

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

Properties which a MachineFunction may have at a given point in time.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const MachineFunctionProperties & getProperties() const

Get the function properties.

Helper class to build MachineInstr.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Legacy analysis pass which computes a MachineUniformityInfo.

Holds all the information related to register banks.

This class implements the register bank concept.

unsigned getID() const

Get the identifier of this register bank.

Wrapper class representing virtual and physical registers.

static Register index2VirtReg(unsigned Index)

Convert a 0-based index to a virtual register number.

void push_back(const T &Elt)

StringRef - Represent a constant reference to a string, i.e.

Target-Independent Code Generator Pass Configuration Options.

virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const

Returns the CSEConfig object to use for the current optimization level.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

operand_type_match m_Reg()

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)

UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)

This is an optimization pass for GlobalISel generic memory operations.

GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)

See if Reg is defined by an single def instruction that is Opcode.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

FunctionPass * createAMDGPURegBankLegalizePass()

Definition AMDGPURegBankLegalize.cpp:90

LLVM_ABI void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver=nullptr)

char & AMDGPURegBankLegalizeID

Definition AMDGPURegBankLegalize.cpp:88