LLVM: lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

29

30#define GET_GICOMBINER_DEPS

31#include "AMDGPUGenPreLegalizeGICombiner.inc"

32#undef GET_GICOMBINER_DEPS

33

34#define DEBUG_TYPE "amdgpu-regbank-combiner"

35

36using namespace llvm;

38

39namespace {

40#define GET_GICOMBINER_TYPES

41#include "AMDGPUGenRegBankGICombiner.inc"

42#undef GET_GICOMBINER_TYPES

43

44class AMDGPURegBankCombinerImpl : public Combiner {

45protected:

46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;

52

53public:

54 AMDGPURegBankCombinerImpl(

57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,

60

61 static const char *getName() { return "AMDGPURegBankCombinerImpl"; }

62

63 bool tryCombineAll(MachineInstr &I) const override;

64

67

68 struct MinMaxMedOpc {

69 unsigned Min, Max, Med;

70 };

71

72 struct Med3MatchInfo {

73 unsigned Opc;

75 };

76

77 MinMaxMedOpc getMinMaxPair(unsigned Opc) const;

78

79 template <class m_Cst, typename CstTy>

81 Register &Val, CstTy &K0, CstTy &K1) const;

82

83 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;

84 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;

87 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;

89

91

93 bool applyD16Load(unsigned D16Opc, MachineInstr &DstMI,

95

96private:

98 bool getIEEE() const;

99 bool getDX10Clamp() const;

103

104#define GET_GICOMBINER_CLASS_MEMBERS

105#define AMDGPUSubtarget GCNSubtarget

106#include "AMDGPUGenRegBankGICombiner.inc"

107#undef GET_GICOMBINER_CLASS_MEMBERS

108#undef AMDGPUSubtarget

109};

110

111#define GET_GICOMBINER_IMPL

112#define AMDGPUSubtarget GCNSubtarget

113#include "AMDGPUGenRegBankGICombiner.inc"

114#undef AMDGPUSubtarget

115#undef GET_GICOMBINER_IMPL

116

117AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(

120 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,

122 : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),

123 RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),

124 TII(*STI.getInstrInfo()),

125 Helper(Observer, B, false, &VT, MDT, LI),

127#include "AMDGPUGenRegBankGICombiner.inc"

129{

130}

131

132bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {

134}

135

137 if (isVgprRegBank(Reg))

138 return Reg;

139

140

141 for (MachineInstr &Use : MRI.use_instructions(Reg)) {

143 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))

144 return Def;

145 }

146

147

149 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));

150 return VgprReg;

151}

152

153AMDGPURegBankCombinerImpl::MinMaxMedOpc

154AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {

155 switch (Opc) {

156 default:

158 case AMDGPU::G_SMAX:

159 case AMDGPU::G_SMIN:

160 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};

161 case AMDGPU::G_UMAX:

162 case AMDGPU::G_UMIN:

163 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};

164 case AMDGPU::G_FMAXNUM:

165 case AMDGPU::G_FMINNUM:

166 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};

167 case AMDGPU::G_FMAXNUM_IEEE:

168 case AMDGPU::G_FMINNUM_IEEE:

169 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,

170 AMDGPU::G_AMDGPU_FMED3};

171 }

172}

173

174template <class m_Cst, typename CstTy>

175bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,

176 MachineRegisterInfo &MRI,

177 MinMaxMedOpc MMMOpc, Register &Val,

178 CstTy &K0, CstTy &K1) const {

179

180

181

182

183

184

190 m_Cst(K1)),

193 m_Cst(K0))));

194}

195

196bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(

197 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {

198 Register Dst = MI.getOperand(0).getReg();

199 if (!isVgprRegBank(Dst))

200 return false;

201

202

203 LLT Ty = MRI.getType(Dst);

205 return false;

206

207 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());

209 std::optional K0, K1;

210

211 if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1))

212 return false;

213

214 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))

215 return false;

216 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))

217 return false;

218

219 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};

220 return true;

221}

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(

242 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {

243 Register Dst = MI.getOperand(0).getReg();

244 LLT Ty = MRI.getType(Dst);

245

246

248 return false;

249

250 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());

251

253 std::optional K0, K1;

254

255 if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1))

256 return false;

257

258 if (K0->Value > K1->Value)

259 return false;

260

261

262

263

264

265

266

267

269

270 if ((MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&

271 (MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {

272 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};

273 return true;

274 }

275 }

276

277 return false;

278}

279

280bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,

282

283 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());

285 std::optional K0, K1;

286

287 if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1))

288 return false;

289

290 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))

291 return false;

292

293

294

295

296

297 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&

300 Reg = Val;

301 return true;

302 }

303

304 return false;

305}

306

307

308

309

310

311

312

313

314

315

316bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,

318

319

323

324 if (isFCst(Src0) && !isFCst(Src1))

326 if (isFCst(Src1) && !isFCst(Src2))

328 if (isFCst(Src0) && !isFCst(Src1))

331 return false;

332

334

335 auto isOp3Zero = [&]() {

337 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)

339 return false;

340 };

341

342

343

344

346 (getIEEE() && getDX10Clamp() &&

348 Reg = Val;

349 return true;

350 }

351

352 return false;

353}

354

355void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,

357 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},

358 MI.getFlags());

359 MI.eraseFromParent();

360}

361

362void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,

363 Med3MatchInfo &MatchInfo) const {

364 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},

365 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),

366 getAsVgpr(MatchInfo.Val2)},

367 MI.getFlags());

368 MI.eraseFromParent();

369}

370

371void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(

372 MachineInstr &MI, MachineInstr &Ext) const {

373 unsigned ShOpc = MI.getOpcode();

374 assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||

375 ShOpc == AMDGPU::G_ASHR);

377

379 Register ShDst = MI.getOperand(0).getReg();

380 Register ShSrc = MI.getOperand(1).getReg();

381

383 LLT AmtTy = MRI.getType(AmtReg);

384

385 auto &RB = *MRI.getRegBank(AmtReg);

386

387 auto NewExt = B.buildAnyExt(ExtAmtTy, AmtReg);

388 auto Mask = B.buildConstant(

390 auto And = B.buildAnd(ExtAmtTy, NewExt, Mask);

391 B.buildInstr(ShOpc, {ShDst}, {ShSrc, And});

392

393 MRI.setRegBank(NewExt.getReg(0), RB);

394 MRI.setRegBank(Mask.getReg(0), RB);

395 MRI.setRegBank(And.getReg(0), RB);

396 MI.eraseFromParent();

397}

398

399bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {

401 MachineInstr *Load, *SextLoad;

402 const int64_t CleanLo16 = 0xFFFFFFFFFFFF0000;

403 const int64_t CleanHi16 = 0x000000000000FFFF;

404

405

410

411 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {

412 const MachineMemOperand *MMO = *Load->memoperands_begin();

414 if (LoadSize == 8)

415 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_U8, MI, Load, Dst);

416 if (LoadSize == 16)

417 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO, MI, Load, Dst);

418 return false;

419 }

420

422 Load, MRI,

424 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)

425 return false;

426

429 return false;

430

431 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8, MI, SextLoad, Dst);

432 }

433

434 return false;

435 }

436

437

442

443 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {

444 const MachineMemOperand *MMO = *Load->memoperands_begin();

446 if (LoadSize == 8)

447 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_U8, MI, Load, Dst);

448 if (LoadSize == 16)

449 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI, MI, Load, Dst);

450 return false;

451 }

452

454 Load, MRI,

456 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)

457 return false;

460 return false;

461

462 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8, MI, SextLoad, Dst);

463 }

464

465 return false;

466 }

467

468 return false;

469}

470

471bool AMDGPURegBankCombinerImpl::applyD16Load(

472 unsigned D16Opc, MachineInstr &DstMI, MachineInstr *SmallLoad,

473 Register SrcReg32ToOverwriteD16) const {

478 return true;

479}

480

481SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {

482 return MF.getInfo()->getMode();

483}

484

485bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }

486

487bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {

488 return getMode().DX10Clamp;

489}

490

491bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {

492 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;

493}

494

495bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {

496 return MI->getOpcode() == AMDGPU::G_FCONSTANT;

497}

498

499bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,

500 MachineInstr *K1) const {

501 if (isFCst(K0) && isFCst(K1)) {

506 }

507 return false;

508}

509

510

511

512

513class AMDGPURegBankCombiner : public MachineFunctionPass {

514public:

515 static char ID;

516

517 AMDGPURegBankCombiner(bool IsOptNone = false);

518

519 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }

520

521 bool runOnMachineFunction(MachineFunction &MF) override;

522

523 void getAnalysisUsage(AnalysisUsage &AU) const override;

524

525private:

526 bool IsOptNone;

527 AMDGPURegBankCombinerImplRuleConfig RuleConfig;

528};

529}

530

531void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

535 AU.addRequired();

536 AU.addPreserved();

537 if (!IsOptNone) {

538 AU.addRequired();

539 AU.addPreserved();

540 }

542}

543

544AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)

545 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

546 if (!RuleConfig.parseCommandLineOption())

548}

549

550bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {

552 return false;

553 auto *TPC = &getAnalysis();

555 bool EnableOpt =

557

560 &getAnalysis().get(MF);

561

562 const auto *LI = ST.getLegalizerInfo();

564 IsOptNone ? nullptr

565 : &getAnalysis().getDomTree();

566

567 CombinerInfo CInfo( false, true,

568 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());

569

570 CInfo.MaxIterations = 1;

572

573

574 CInfo.EnableFullDCE = false;

575 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *VT, nullptr,

576 RuleConfig, ST, MDT, LI);

577 return Impl.combineMachineInstrs();

578}

579

580char AMDGPURegBankCombiner::ID = 0;

582 "Combine AMDGPU machine instrs after regbankselect",

583 false, false)

587 "Combine AMDGPU machine instrs after regbankselect", false,

589

591 return new AMDGPURegBankCombiner(IsOptNone);

592}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

#define GET_GICOMBINER_CONSTRUCTOR_INITS

const TargetInstrInfo & TII

This file declares the targeting of the Machinelegalizer class for AMDGPU.

Provides AMDGPU specific target descriptions.

This file declares the targeting of the RegisterBankInfo class for AMDGPU.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This contains common combine transformations that may be used in a combine pass,or by the target else...

Option class for Targets to specify which operations are combined how and when.

This contains the base class for all Combiners generated by TableGen.

AMD GCN specific subclass of TargetSubtarget.

Provides analysis for querying information about KnownBits during GISel passes.

Contains matchers for matching SSA Machine Instructions.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static StringRef getName(Value *V)

static bool isClampZeroToOne(SDValue A, SDValue B)

Target-Independent Code Generator Pass Configuration Options pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

LLVM_ABI bool isExactlyValue(const APFloat &V) const

We don't rely on operator== working on double values, as it returns true for things that are clearly ...

FunctionPass class - This class is used to implement most global optimizations.

To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...

constexpr unsigned getScalarSizeInBits() const

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

TypeSize getValue() const

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineFunctionProperties & getProperties() const

Get the function properties.

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

mmo_iterator memoperands_begin() const

Access to memory operands of the instruction.

ArrayRef< MachineMemOperand * > memoperands() const

Access to memory operands of the instruction.

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

const MachineOperand & getOperand(unsigned i) const

LocationSize getSizeInBits() const

Return the size in bits of the memory reference.

Register getReg() const

getReg - Returns the register number.

const ConstantFP * getFPImm() const

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Holds all the information related to register banks.

const RegisterBank & getRegBank(unsigned ID)

Get the register bank identified by ID.

unsigned getID() const

Get the identifier of this register bank.

Wrapper class representing virtual and physical registers.

CodeGenOptLevel getOptLevel() const

Returns the optimization level: None, Less, Default, or Aggressive.

Target-Independent Code Generator Pass Configuration Options.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

operand_type_match m_Reg()

SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)

Matches a constant equal to RequestedValue.

UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)

BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)

BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)

Or< Preds... > m_any_of(Preds &&... preds)

BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)

UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)

bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)

NodeAddr< DefNode * > Def

NodeAddr< UseNode * > Use

This is an optimization pass for GlobalISel generic memory operations.

FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)

Definition AMDGPURegBankCombiner.cpp:590

LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the def instruction for Reg, folding away any trivial copies.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)

Modify analysis usage so it preserves passes required for the SelectionDAG fallback.

@ And

Bitwise or logical AND of integers.

LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)

Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...

bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)

Returns true if Val can be assumed to never be a signaling NaN.

constexpr T maskTrailingOnes(unsigned N)

Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

@ SinglePass

Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...