LLVM: lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

14#include "llvm/IR/IntrinsicsAMDGPU.h"

16

17using namespace llvm;

19

25 TII(*STI.getInstrInfo()) {}

26

29 switch (MI.getOpcode()) {

30 case AMDGPU::G_FADD:

31 case AMDGPU::G_FSUB:

32 case AMDGPU::G_FMUL:

33 case AMDGPU::G_FMA:

34 case AMDGPU::G_FMAD:

35 case AMDGPU::G_FMINNUM:

36 case AMDGPU::G_FMAXNUM:

37 case AMDGPU::G_FMINNUM_IEEE:

38 case AMDGPU::G_FMAXNUM_IEEE:

39 case AMDGPU::G_FMINIMUM:

40 case AMDGPU::G_FMAXIMUM:

41 case AMDGPU::G_FSIN:

42 case AMDGPU::G_FPEXT:

43 case AMDGPU::G_INTRINSIC_TRUNC:

44 case AMDGPU::G_FPTRUNC:

45 case AMDGPU::G_FRINT:

46 case AMDGPU::G_FNEARBYINT:

47 case AMDGPU::G_INTRINSIC_ROUND:

48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:

49 case AMDGPU::G_FCANONICALIZE:

50 case AMDGPU::G_AMDGPU_RCP_IFLAG:

51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:

53 return true;

54 case AMDGPU::G_INTRINSIC: {

56 switch (IntrinsicID) {

57 case Intrinsic::amdgcn_rcp:

58 case Intrinsic::amdgcn_rcp_legacy:

59 case Intrinsic::amdgcn_sin:

60 case Intrinsic::amdgcn_fmul_legacy:

61 case Intrinsic::amdgcn_fmed3:

62 case Intrinsic::amdgcn_fma_legacy:

63 return true;

64 default:

65 return false;

66 }

67 }

68 default:

69 return false;

70 }

71}

72

73

74

75

80 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;

81}

82

83

86 if (MI.memoperands().empty())

87 return false;

88

89 switch (MI.getOpcode()) {

90 case AMDGPU::COPY:

91 case AMDGPU::G_SELECT:

92 case AMDGPU::G_FDIV:

93 case AMDGPU::G_FREM:

94 case TargetOpcode::INLINEASM:

95 case TargetOpcode::INLINEASM_BR:

96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:

97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:

98 case AMDGPU::G_BITCAST:

99 case AMDGPU::G_ANYEXT:

100 case AMDGPU::G_BUILD_VECTOR:

101 case AMDGPU::G_BUILD_VECTOR_TRUNC:

102 case AMDGPU::G_PHI:

103 return false;

104 case AMDGPU::G_INTRINSIC:

105 case AMDGPU::G_INTRINSIC_CONVERGENT: {

107 switch (IntrinsicID) {

108 case Intrinsic::amdgcn_interp_p1:

109 case Intrinsic::amdgcn_interp_p2:

110 case Intrinsic::amdgcn_interp_mov:

111 case Intrinsic::amdgcn_interp_p1_f16:

112 case Intrinsic::amdgcn_interp_p2_f16:

113 case Intrinsic::amdgcn_div_scale:

114 return false;

115 default:

116 return true;

117 }

118 }

119 default:

120 return true;

121 }

122}

123

126

127

128

129

130

131 unsigned NumMayIncreaseSize = 0;

132 Register Dst = MI.getOperand(0).getReg();

135 return false;

136

139 return false;

140 }

141 }

142 return true;

143}

144

149

154 APInt(64, 0x3fc45f306dc9c882));

155

158}

159

160

161

164 std::optional FPValReg;

166 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())

167 return true;

168

170 if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))

171 return true;

172 }

173 return false;

174}

175

177 switch (Opc) {

178 case AMDGPU::G_FMAXNUM:

179 return AMDGPU::G_FMINNUM;

180 case AMDGPU::G_FMINNUM:

181 return AMDGPU::G_FMAXNUM;

182 case AMDGPU::G_FMAXNUM_IEEE:

183 return AMDGPU::G_FMINNUM_IEEE;

184 case AMDGPU::G_FMINNUM_IEEE:

185 return AMDGPU::G_FMAXNUM_IEEE;

186 case AMDGPU::G_FMAXIMUM:

187 return AMDGPU::G_FMINIMUM;

188 case AMDGPU::G_FMINIMUM:

189 return AMDGPU::G_FMAXIMUM;

190 case AMDGPU::G_AMDGPU_FMAX_LEGACY:

191 return AMDGPU::G_AMDGPU_FMIN_LEGACY;

192 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

193 return AMDGPU::G_AMDGPU_FMAX_LEGACY;

194 default:

196 }

197}

198

201 Register Src = MI.getOperand(1).getReg();

202 MatchInfo = MRI.getVRegDef(Src);

203

204

205

206

207

208 if (MRI.hasOneNonDBGUse(Src)) {

210 return false;

211 } else {

215 return false;

216 }

217

218 switch (MatchInfo->getOpcode()) {

219 case AMDGPU::G_FMINNUM:

220 case AMDGPU::G_FMAXNUM:

221 case AMDGPU::G_FMINNUM_IEEE:

222 case AMDGPU::G_FMAXNUM_IEEE:

223 case AMDGPU::G_FMINIMUM:

224 case AMDGPU::G_FMAXIMUM:

225 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

226 case AMDGPU::G_AMDGPU_FMAX_LEGACY:

227

230 case AMDGPU::G_FADD:

231 case AMDGPU::G_FSUB:

232 case AMDGPU::G_FMA:

233 case AMDGPU::G_FMAD:

235 case AMDGPU::G_FMUL:

236 case AMDGPU::G_FPEXT:

237 case AMDGPU::G_INTRINSIC_TRUNC:

238 case AMDGPU::G_FPTRUNC:

239 case AMDGPU::G_FRINT:

240 case AMDGPU::G_FNEARBYINT:

241 case AMDGPU::G_INTRINSIC_ROUND:

242 case AMDGPU::G_INTRINSIC_ROUNDEVEN:

243 case AMDGPU::G_FSIN:

244 case AMDGPU::G_FCANONICALIZE:

245 case AMDGPU::G_AMDGPU_RCP_IFLAG:

246 return true;

247 case AMDGPU::G_INTRINSIC:

248 case AMDGPU::G_INTRINSIC_CONVERGENT: {

250 switch (IntrinsicID) {

251 case Intrinsic::amdgcn_rcp:

252 case Intrinsic::amdgcn_rcp_legacy:

253 case Intrinsic::amdgcn_sin:

254 case Intrinsic::amdgcn_fmul_legacy:

255 case Intrinsic::amdgcn_fmed3:

256 return true;

257 case Intrinsic::amdgcn_fma_legacy:

259 default:

260 return false;

261 }

262 }

263 default:

264 return false;

265 }

266}

267

270

271

272

273

274

275

276

277

278

279

280

281

282

283

287 Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);

289 };

290

291

299 else {

300 YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);

302 }

303 };

304

305 Builder.setInstrAndDebugLoc(*MatchInfo);

306

307

308

309 switch (MatchInfo->getOpcode()) {

310 case AMDGPU::G_FADD:

311 case AMDGPU::G_FSUB:

312 NegateOperand(MatchInfo->getOperand(1));

313 NegateOperand(MatchInfo->getOperand(2));

314 break;

315 case AMDGPU::G_FMUL:

317 break;

318 case AMDGPU::G_FMINNUM:

319 case AMDGPU::G_FMAXNUM:

320 case AMDGPU::G_FMINNUM_IEEE:

321 case AMDGPU::G_FMAXNUM_IEEE:

322 case AMDGPU::G_FMINIMUM:

323 case AMDGPU::G_FMAXIMUM:

324 case AMDGPU::G_AMDGPU_FMIN_LEGACY:

325 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {

326 NegateOperand(MatchInfo->getOperand(1));

327 NegateOperand(MatchInfo->getOperand(2));

330 break;

331 }

332 case AMDGPU::G_FMA:

333 case AMDGPU::G_FMAD:

335 NegateOperand(MatchInfo->getOperand(3));

336 break;

337 case AMDGPU::G_FPEXT:

338 case AMDGPU::G_INTRINSIC_TRUNC:

339 case AMDGPU::G_FRINT:

340 case AMDGPU::G_FNEARBYINT:

341 case AMDGPU::G_INTRINSIC_ROUND:

342 case AMDGPU::G_INTRINSIC_ROUNDEVEN:

343 case AMDGPU::G_FSIN:

344 case AMDGPU::G_FCANONICALIZE:

345 case AMDGPU::G_AMDGPU_RCP_IFLAG:

346 case AMDGPU::G_FPTRUNC:

347 NegateOperand(MatchInfo->getOperand(1));

348 break;

349 case AMDGPU::G_INTRINSIC:

350 case AMDGPU::G_INTRINSIC_CONVERGENT: {

352 switch (IntrinsicID) {

353 case Intrinsic::amdgcn_rcp:

354 case Intrinsic::amdgcn_rcp_legacy:

355 case Intrinsic::amdgcn_sin:

356 NegateOperand(MatchInfo->getOperand(2));

357 break;

358 case Intrinsic::amdgcn_fmul_legacy:

360 break;

361 case Intrinsic::amdgcn_fmed3:

362 NegateOperand(MatchInfo->getOperand(2));

363 NegateOperand(MatchInfo->getOperand(3));

364 NegateOperand(MatchInfo->getOperand(4));

365 break;

366 case Intrinsic::amdgcn_fma_legacy:

368 NegateOperand(MatchInfo->getOperand(4));

369 break;

370 default:

371 llvm_unreachable("folding fneg not supported for this intrinsic");

372 }

373 break;

374 }

375 default:

376 llvm_unreachable("folding fneg not supported for this instruction");

377 }

378

379 Register Dst = MI.getOperand(0).getReg();

381

382 if (MRI.hasOneNonDBGUse(MatchInfoDst)) {

383

385 } else {

386

387

388

390 Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);

392

393

395

396

397 auto NextInst = ++MatchInfo->getIterator();

398 Builder.setInstrAndDebugLoc(*NextInst);

399 Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());

400 }

401

402 MI.eraseFromParent();

403}

404

405

406

410 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {

411 Register SrcReg = Def->getOperand(1).getReg();

413 }

414

415 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {

416 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();

417 bool LosesInfo = true;

419 return !LosesInfo;

420 }

421

422 return false;

423}

424

429 assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);

430 Register SrcReg = MI.getOperand(1).getReg();

431 if (MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32))

432 return false;

433

436}

437

442

443

447

448 LLT Ty = MRI.getType(Src0);

449 auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1);

450 auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1);

451 auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2);

452 Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);

453 MI.eraseFromParent();

454}

455

459 assert(MI.getOpcode() == TargetOpcode::G_FMUL);

462

463 Register Dst = MI.getOperand(0).getReg();

464 LLT DestTy = MRI.getType(Dst);

466

470 return false;

471

475

476 const auto SelectTrueVal =

478 if (!SelectTrueVal)

479 return false;

480 const auto SelectFalseVal =

482 if (!SelectFalseVal)

483 return false;

484

485 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())

486 return false;

487

488

489 if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&

490 TII.isInlineConstant(*SelectFalseVal))

491 return false;

492

493 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();

494 if (SelectTrueLog2Val == INT_MIN)

495 return false;

496 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();

497 if (SelectFalseLog2Val == INT_MIN)

498 return false;

499

502 auto NewSel = Builder.buildSelect(

503 IntDestTy, SelectCondReg,

504 Builder.buildConstant(IntDestTy, SelectTrueLog2Val),

505 Builder.buildConstant(IntDestTy, SelectFalseLog2Val));

506

507 Register XReg = MI.getOperand(1).getReg();

508 if (SelectTrueVal->isNegative()) {

509 auto NegX =

510 Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());

511 Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());

512 } else {

513 Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());

514 }

515 };

516

517 return true;

518}

519

522 if (!Res)

523 return false;

524

525 const uint64_t Val = Res->Value.getZExtValue();

526 unsigned MaskIdx = 0;

527 unsigned MaskLen = 0;

529 return false;

530

531

532 return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));

533}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)

Definition AMDGPUCombinerHelper.cpp:85

static bool isInv2Pi(const APFloat &APF)

Definition AMDGPUCombinerHelper.cpp:150

static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)

Definition AMDGPUCombinerHelper.cpp:407

static bool mayIgnoreSignedZero(MachineInstr &MI)

Definition AMDGPUCombinerHelper.cpp:145

static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)

Definition AMDGPUCombinerHelper.cpp:162

static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)

Definition AMDGPUCombinerHelper.cpp:124

static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)

returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...

Definition AMDGPUCombinerHelper.cpp:77

static unsigned inverseMinMax(unsigned Opc)

Definition AMDGPUCombinerHelper.cpp:176

static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)

Definition AMDGPUCombinerHelper.cpp:28

This contains common combine transformations that may be used in a combine pass.

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

Contains matchers for matching SSA Machine Instructions.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)

Definition AMDGPUCombinerHelper.cpp:20

bool matchConstantIs32BitMask(Register Reg) const

Definition AMDGPUCombinerHelper.cpp:520

bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo) const

Definition AMDGPUCombinerHelper.cpp:456

CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)

bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const

Definition AMDGPUCombinerHelper.cpp:425

void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const

Definition AMDGPUCombinerHelper.cpp:268

bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const

Definition AMDGPUCombinerHelper.cpp:199

void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const

Definition AMDGPUCombinerHelper.cpp:438

static const fltSemantics & IEEEsingle()

static const fltSemantics & IEEEdouble()

static constexpr roundingMode rmNearestTiesToEven

static const fltSemantics & IEEEhalf()

LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)

bool bitwiseIsEqual(const APFloat &RHS) const

Class for arbitrary precision integers.

void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const

MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.

void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const

Replace a single register operand with a new register and inform the observer of the changes.

void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const

Replace the opcode in instruction with a new opcode and inform the observer of the changes.

MachineRegisterInfo & MRI

MachineDominatorTree * MDT

GISelChangeObserver & Observer

MachineIRBuilder & Builder

Abstract class that contains various methods for clients to notify about changes.

static constexpr LLT float64()

Get a 64-bit IEEE double value.

constexpr LLT changeElementType(LLT NewEltTy) const

If this type is a vector, return a vector with the same number of elements but the new element type.

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

static constexpr LLT float16()

Get a 16-bit IEEE half value.

constexpr LLT getScalarType() const

static constexpr LLT float32()

Get a 32-bit IEEE float value.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

Helper class to build MachineInstr.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Wrapper class representing virtual and physical registers.

The instances of the Type class are immutable: once they are created, they are never changed.

A Use represents the edge between a Value definition and its users.

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

operand_type_match m_Reg()

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)

GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)

This is an optimization pass for GlobalISel generic memory operations.

constexpr bool isShiftedMask_64(uint64_t Value)

Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

DWARFExpression::Operation Op

LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)

Determines if MI defines a float constant integer or a splat vector of float constant integers.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...

static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))