LLVM: lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

14#include "llvm/IR/IntrinsicsAMDGPU.h"

16

17using namespace llvm;

18using namespace MIPatternMatch;

19

24 : CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),

25 TII(*STI.getInstrInfo()) {}

26

29 switch (MI.getOpcode()) {

30 case AMDGPU::G_FADD:

31 case AMDGPU::G_FSUB:

32 case AMDGPU::G_FMUL:

33 case AMDGPU::G_FMA:

34 case AMDGPU::G_FMAD:

35 case AMDGPU::G_FMINNUM:

36 case AMDGPU::G_FMAXNUM:

37 case AMDGPU::G_FMINNUM_IEEE:

38 case AMDGPU::G_FMAXNUM_IEEE:

39 case AMDGPU::G_FMINIMUM:

40 case AMDGPU::G_FMAXIMUM:

41 case AMDGPU::G_FSIN:

42 case AMDGPU::G_FPEXT:

43 case AMDGPU::G_INTRINSIC_TRUNC:

44 case AMDGPU::G_FPTRUNC:

45 case AMDGPU::G_FRINT:

46 case AMDGPU::G_FNEARBYINT:

47 case AMDGPU::G_INTRINSIC_ROUND:

48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:

49 case AMDGPU::G_FCANONICALIZE:

50 case AMDGPU::G_AMDGPU_RCP_IFLAG:

51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:

53 return true;

54 case AMDGPU::G_INTRINSIC: {

55 Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID();

56 switch (IntrinsicID) {

57 case Intrinsic::amdgcn_rcp:

58 case Intrinsic::amdgcn_rcp_legacy:

59 case Intrinsic::amdgcn_sin:

60 case Intrinsic::amdgcn_fmul_legacy:

61 case Intrinsic::amdgcn_fmed3:

62 case Intrinsic::amdgcn_fma_legacy:

63 return true;

64 default:

65 return false;

66 }

67 }

68 default:

69 return false;

70 }

71}

72

73

74

75

79 return MI.getNumOperands() > (isa(MI) ? 4u : 3u) ||

81}

82

83

86 if (MI.memoperands().empty())

87 return false;

88

89 switch (MI.getOpcode()) {

90 case AMDGPU::COPY:

91 case AMDGPU::G_SELECT:

92 case AMDGPU::G_FDIV:

93 case AMDGPU::G_FREM:

94 case TargetOpcode::INLINEASM:

95 case TargetOpcode::INLINEASM_BR:

96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:

97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:

98 case AMDGPU::G_BITCAST:

99 case AMDGPU::G_ANYEXT:

100 case AMDGPU::G_BUILD_VECTOR:

101 case AMDGPU::G_BUILD_VECTOR_TRUNC:

102 case AMDGPU::G_PHI:

103 return false;

104 case AMDGPU::G_INTRINSIC:

105 case AMDGPU::G_INTRINSIC_CONVERGENT: {

106 Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID();

107 switch (IntrinsicID) {

108 case Intrinsic::amdgcn_interp_p1:

109 case Intrinsic::amdgcn_interp_p2:

110 case Intrinsic::amdgcn_interp_mov:

111 case Intrinsic::amdgcn_interp_p1_f16:

112 case Intrinsic::amdgcn_interp_p2_f16:

113 case Intrinsic::amdgcn_div_scale:

114 return false;

115 default:

116 return true;

117 }

118 }

119 default:

120 return true;

121 }

122}

123

126

127

128

129

130

131 unsigned NumMayIncreaseSize = 0;

132 Register Dst = MI.getOperand(0).getReg();

135 return false;

136

139 return false;

140 }

141 }

142 return true;

143}

144

148}

149

154 APInt(64, 0x3fc45f306dc9c882));

155

158}

159

160

161

164 std::optional FPValReg;

166 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())

167 return true;

168

170 if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))

171 return true;

172 }

173 return false;

174}

175

177 switch (Opc) {

178 case AMDGPU::G_FMAXNUM:

179 return AMDGPU::G_FMINNUM;

180 case AMDGPU::G_FMINNUM:

181 return AMDGPU::G_FMAXNUM;

182 case AMDGPU::G_FMAXNUM_IEEE:

183 return AMDGPU::G_FMINNUM_IEEE;

184 case AMDGPU::G_FMINNUM_IEEE:

185 return AMDGPU::G_FMAXNUM_IEEE;

186 case AMDGPU::G_FMAXIMUM:

187 return AMDGPU::G_FMINIMUM;

188 case AMDGPU::G_FMINIMUM:

189 return AMDGPU::G_FMAXIMUM;

190 case AMDGPU::G_AMDGPU_FMAX_LEGACY:

191 return AMDGPU::G_AMDGPU_FMIN_LEGACY;

192 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

193 return AMDGPU::G_AMDGPU_FMAX_LEGACY;

194 default:

196 }

197}

198

201 Register Src = MI.getOperand(1).getReg();

203

204

205

206

207

210 return false;

211 } else {

215 return false;

216 }

217

218 switch (MatchInfo->getOpcode()) {

219 case AMDGPU::G_FMINNUM:

220 case AMDGPU::G_FMAXNUM:

221 case AMDGPU::G_FMINNUM_IEEE:

222 case AMDGPU::G_FMAXNUM_IEEE:

223 case AMDGPU::G_FMINIMUM:

224 case AMDGPU::G_FMAXIMUM:

225 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

226 case AMDGPU::G_AMDGPU_FMAX_LEGACY:

227

230 case AMDGPU::G_FADD:

231 case AMDGPU::G_FSUB:

232 case AMDGPU::G_FMA:

233 case AMDGPU::G_FMAD:

235 case AMDGPU::G_FMUL:

236 case AMDGPU::G_FPEXT:

237 case AMDGPU::G_INTRINSIC_TRUNC:

238 case AMDGPU::G_FPTRUNC:

239 case AMDGPU::G_FRINT:

240 case AMDGPU::G_FNEARBYINT:

241 case AMDGPU::G_INTRINSIC_ROUND:

242 case AMDGPU::G_INTRINSIC_ROUNDEVEN:

243 case AMDGPU::G_FSIN:

244 case AMDGPU::G_FCANONICALIZE:

245 case AMDGPU::G_AMDGPU_RCP_IFLAG:

246 return true;

247 case AMDGPU::G_INTRINSIC:

248 case AMDGPU::G_INTRINSIC_CONVERGENT: {

249 Intrinsic::ID IntrinsicID = cast(MatchInfo)->getIntrinsicID();

250 switch (IntrinsicID) {

251 case Intrinsic::amdgcn_rcp:

252 case Intrinsic::amdgcn_rcp_legacy:

253 case Intrinsic::amdgcn_sin:

254 case Intrinsic::amdgcn_fmul_legacy:

255 case Intrinsic::amdgcn_fmed3:

256 return true;

257 case Intrinsic::amdgcn_fma_legacy:

259 default:

260 return false;

261 }

262 }

263 default:

264 return false;

265 }

266}

267

270

271

272

273

274

275

276

277

278

279

280

281

282

283

289 };

290

291

299 else {

302 }

303 };

304

306

307

308

309 switch (MatchInfo->getOpcode()) {

310 case AMDGPU::G_FADD:

311 case AMDGPU::G_FSUB:

312 NegateOperand(MatchInfo->getOperand(1));

313 NegateOperand(MatchInfo->getOperand(2));

314 break;

315 case AMDGPU::G_FMUL:

317 break;

318 case AMDGPU::G_FMINNUM:

319 case AMDGPU::G_FMAXNUM:

320 case AMDGPU::G_FMINNUM_IEEE:

321 case AMDGPU::G_FMAXNUM_IEEE:

322 case AMDGPU::G_FMINIMUM:

323 case AMDGPU::G_FMAXIMUM:

324 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

325 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {

326 NegateOperand(MatchInfo->getOperand(1));

327 NegateOperand(MatchInfo->getOperand(2));

330 break;

331 }

332 case AMDGPU::G_FMA:

333 case AMDGPU::G_FMAD:

335 NegateOperand(MatchInfo->getOperand(3));

336 break;

337 case AMDGPU::G_FPEXT:

338 case AMDGPU::G_INTRINSIC_TRUNC:

339 case AMDGPU::G_FRINT:

340 case AMDGPU::G_FNEARBYINT:

341 case AMDGPU::G_INTRINSIC_ROUND:

342 case AMDGPU::G_INTRINSIC_ROUNDEVEN:

343 case AMDGPU::G_FSIN:

344 case AMDGPU::G_FCANONICALIZE:

345 case AMDGPU::G_AMDGPU_RCP_IFLAG:

346 case AMDGPU::G_FPTRUNC:

347 NegateOperand(MatchInfo->getOperand(1));

348 break;

349 case AMDGPU::G_INTRINSIC:

350 case AMDGPU::G_INTRINSIC_CONVERGENT: {

351 Intrinsic::ID IntrinsicID = cast(MatchInfo)->getIntrinsicID();

352 switch (IntrinsicID) {

353 case Intrinsic::amdgcn_rcp:

354 case Intrinsic::amdgcn_rcp_legacy:

355 case Intrinsic::amdgcn_sin:

356 NegateOperand(MatchInfo->getOperand(2));

357 break;

358 case Intrinsic::amdgcn_fmul_legacy:

360 break;

361 case Intrinsic::amdgcn_fmed3:

362 NegateOperand(MatchInfo->getOperand(2));

363 NegateOperand(MatchInfo->getOperand(3));

364 NegateOperand(MatchInfo->getOperand(4));

365 break;

366 case Intrinsic::amdgcn_fma_legacy:

368 NegateOperand(MatchInfo->getOperand(4));

369 break;

370 default:

371 llvm_unreachable("folding fneg not supported for this intrinsic");

372 }

373 break;

374 }

375 default:

376 llvm_unreachable("folding fneg not supported for this instruction");

377 }

378

379 Register Dst = MI.getOperand(0).getReg();

381

383

385 } else {

386

387

388

392

393

395

396

397 auto NextInst = ++MatchInfo->getIterator();

400 }

401

402 MI.eraseFromParent();

403}

404

405

406

410 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {

411 Register SrcReg = Def->getOperand(1).getReg();

413 }

414

415 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {

416 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();

417 bool LosesInfo = true;

419 return !LosesInfo;

420 }

421

422 return false;

423}

424

429 assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);

430 Register SrcReg = MI.getOperand(1).getReg();

432 return false;

433

436}

437

442

443

447

453 MI.eraseFromParent();

454}

455

459 assert(MI.getOpcode() == TargetOpcode::G_FMUL);

462

463 Register Dst = MI.getOperand(0).getReg();

466

470 return false;

471

475

476 const auto SelectTrueVal =

478 if (!SelectTrueVal)

479 return false;

480 const auto SelectFalseVal =

482 if (!SelectFalseVal)

483 return false;

484

485 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())

486 return false;

487

488

491 return false;

492

493 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();

494 if (SelectTrueLog2Val == INT_MIN)

495 return false;

496 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();

497 if (SelectFalseLog2Val == INT_MIN)

498 return false;

499

503 IntDestTy, SelectCondReg,

506

507 Register XReg = MI.getOperand(1).getReg();

508 if (SelectTrueVal->isNegative()) {

509 auto NegX =

512 } else {

514 }

515 };

516

517 return true;

518}

unsigned const MachineRegisterInfo * MRI

static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)

static bool isInv2Pi(const APFloat &APF)

static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)

static bool mayIgnoreSignedZero(MachineInstr &MI)

static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)

static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)

static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)

returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...

static unsigned inverseMinMax(unsigned Opc)

static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)

This contains common combine transformations that may be used in a combine pass.

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

AMD GCN specific subclass of TargetSubtarget.

Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

const HexagonInstrInfo * TII

Contains matchers for matching SSA Machine Instructions.

static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static unsigned getScalarSizeInBits(Type *Ty)

AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)

bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo) const

bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const

void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const

bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const

void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const

opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)

bool bitwiseIsEqual(const APFloat &RHS) const

Class for arbitrary precision integers.

void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const

MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.

void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const

Replace a single register operand with a new register and inform the observer of the changes.

void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const

Replace the opcode in instruction with a new opcode and inform the observer of the changes.

MachineRegisterInfo & MRI

MachineIRBuilder & Builder

This class represents an Operation in the Expression.

Abstract class that contains various methods for clients to notify about changes.

static constexpr LLT float64()

Get a 64-bit IEEE double value.

constexpr LLT changeElementType(LLT NewEltTy) const

If this type is a vector, return a vector with the same number of elements but the new element type.

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

static constexpr LLT float16()

Get a 16-bit IEEE half value.

constexpr LLT getScalarType() const

static constexpr LLT float32()

Get a 32-bit IEEE float value.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

Helper class to build MachineInstr.

MachineInstrBuilder buildFLdexp(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

Build and insert Dst = G_FLDEXP Src0, Src1.

MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)

Build and insert a Res = G_SELECT Tst, Op0, Op1.

void setInstrAndDebugLoc(MachineInstr &MI)

Set the insertion point to before MI, and set the debug loc to MI's loc.

MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)

Build and insert Res = G_FNEG Op0.

MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)

Build and insert Res = G_FPTRUNC Op.

virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)

Build and insert Res = G_CONSTANT Val.

Register getReg(unsigned Idx) const

Get the register for the operand index.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

uint32_t getFlags() const

Return the MI flags bitvector.

MachineOperand class - Representation of each machine instruction operand.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

bool hasOneNonDBGUse(Register RegNo) const

hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.

MachineInstr * getVRegDef(Register Reg) const

getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...

LLT getType(Register Reg) const

Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.

Register createGenericVirtualRegister(LLT Ty, StringRef Name="")

Create and return a new generic virtual register with low-level type Ty.

Wrapper class representing virtual and physical registers.

bool isInlineConstant(const APInt &Imm) const

The instances of the Type class are immutable: once they are created, they are never changed.

A Use represents the edge between a Value definition and its users.

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

operand_type_match m_Reg()

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)

GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)

This is an optimization pass for GlobalISel generic memory operations.

DWARFExpression::Operation Op

std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)

Determines if MI defines a float constant integer or a splat vector of float constant integers.

static const fltSemantics & IEEEsingle() LLVM_READNONE

static constexpr roundingMode rmNearestTiesToEven

static const fltSemantics & IEEEdouble() LLVM_READNONE

static const fltSemantics & IEEEhalf() LLVM_READNONE