LLVM: lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
14#include "llvm/IR/IntrinsicsAMDGPU.h"
16
17using namespace llvm;
18using namespace MIPatternMatch;
19
24 : CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),
25 TII(*STI.getInstrInfo()) {}
26
29 switch (MI.getOpcode()) {
30 case AMDGPU::G_FADD:
31 case AMDGPU::G_FSUB:
32 case AMDGPU::G_FMUL:
33 case AMDGPU::G_FMA:
34 case AMDGPU::G_FMAD:
35 case AMDGPU::G_FMINNUM:
36 case AMDGPU::G_FMAXNUM:
37 case AMDGPU::G_FMINNUM_IEEE:
38 case AMDGPU::G_FMAXNUM_IEEE:
39 case AMDGPU::G_FMINIMUM:
40 case AMDGPU::G_FMAXIMUM:
41 case AMDGPU::G_FSIN:
42 case AMDGPU::G_FPEXT:
43 case AMDGPU::G_INTRINSIC_TRUNC:
44 case AMDGPU::G_FPTRUNC:
45 case AMDGPU::G_FRINT:
46 case AMDGPU::G_FNEARBYINT:
47 case AMDGPU::G_INTRINSIC_ROUND:
48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
49 case AMDGPU::G_FCANONICALIZE:
50 case AMDGPU::G_AMDGPU_RCP_IFLAG:
51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
53 return true;
54 case AMDGPU::G_INTRINSIC: {
55 Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID();
56 switch (IntrinsicID) {
57 case Intrinsic::amdgcn_rcp:
58 case Intrinsic::amdgcn_rcp_legacy:
59 case Intrinsic::amdgcn_sin:
60 case Intrinsic::amdgcn_fmul_legacy:
61 case Intrinsic::amdgcn_fmed3:
62 case Intrinsic::amdgcn_fma_legacy:
63 return true;
64 default:
65 return false;
66 }
67 }
68 default:
69 return false;
70 }
71}
72
73
74
75
79 return MI.getNumOperands() > (isa(MI) ? 4u : 3u) ||
81}
82
83
86 if (.memoperands().empty())
87 return false;
88
89 switch (MI.getOpcode()) {
90 case AMDGPU::COPY:
91 case AMDGPU::G_SELECT:
92 case AMDGPU::G_FDIV:
93 case AMDGPU::G_FREM:
94 case TargetOpcode::INLINEASM:
95 case TargetOpcode::INLINEASM_BR:
96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
98 case AMDGPU::G_BITCAST:
99 case AMDGPU::G_ANYEXT:
100 case AMDGPU::G_BUILD_VECTOR:
101 case AMDGPU::G_BUILD_VECTOR_TRUNC:
102 case AMDGPU::G_PHI:
103 return false;
104 case AMDGPU::G_INTRINSIC:
105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
106 Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID();
107 switch (IntrinsicID) {
108 case Intrinsic::amdgcn_interp_p1:
109 case Intrinsic::amdgcn_interp_p2:
110 case Intrinsic::amdgcn_interp_mov:
111 case Intrinsic::amdgcn_interp_p1_f16:
112 case Intrinsic::amdgcn_interp_p2_f16:
113 case Intrinsic::amdgcn_div_scale:
114 return false;
115 default:
116 return true;
117 }
118 }
119 default:
120 return true;
121 }
122}
123
126
127
128
129
130
131 unsigned NumMayIncreaseSize = 0;
132 Register Dst = MI.getOperand(0).getReg();
135 return false;
136
139 return false;
140 }
141 }
142 return true;
143}
144
148}
149
154 APInt(64, 0x3fc45f306dc9c882));
155
158}
159
160
161
164 std::optional FPValReg;
166 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
167 return true;
168
170 if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
171 return true;
172 }
173 return false;
174}
175
177 switch (Opc) {
178 case AMDGPU::G_FMAXNUM:
179 return AMDGPU::G_FMINNUM;
180 case AMDGPU::G_FMINNUM:
181 return AMDGPU::G_FMAXNUM;
182 case AMDGPU::G_FMAXNUM_IEEE:
183 return AMDGPU::G_FMINNUM_IEEE;
184 case AMDGPU::G_FMINNUM_IEEE:
185 return AMDGPU::G_FMAXNUM_IEEE;
186 case AMDGPU::G_FMAXIMUM:
187 return AMDGPU::G_FMINIMUM;
188 case AMDGPU::G_FMINIMUM:
189 return AMDGPU::G_FMAXIMUM;
190 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
191 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
192 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
193 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
194 default:
196 }
197}
198
201 Register Src = MI.getOperand(1).getReg();
203
204
205
206
207
210 return false;
211 } else {
215 return false;
216 }
217
218 switch (MatchInfo->getOpcode()) {
219 case AMDGPU::G_FMINNUM:
220 case AMDGPU::G_FMAXNUM:
221 case AMDGPU::G_FMINNUM_IEEE:
222 case AMDGPU::G_FMAXNUM_IEEE:
223 case AMDGPU::G_FMINIMUM:
224 case AMDGPU::G_FMAXIMUM:
225 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
226 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
227
230 case AMDGPU::G_FADD:
231 case AMDGPU::G_FSUB:
232 case AMDGPU::G_FMA:
233 case AMDGPU::G_FMAD:
235 case AMDGPU::G_FMUL:
236 case AMDGPU::G_FPEXT:
237 case AMDGPU::G_INTRINSIC_TRUNC:
238 case AMDGPU::G_FPTRUNC:
239 case AMDGPU::G_FRINT:
240 case AMDGPU::G_FNEARBYINT:
241 case AMDGPU::G_INTRINSIC_ROUND:
242 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
243 case AMDGPU::G_FSIN:
244 case AMDGPU::G_FCANONICALIZE:
245 case AMDGPU::G_AMDGPU_RCP_IFLAG:
246 return true;
247 case AMDGPU::G_INTRINSIC:
248 case AMDGPU::G_INTRINSIC_CONVERGENT: {
249 Intrinsic::ID IntrinsicID = cast(MatchInfo)->getIntrinsicID();
250 switch (IntrinsicID) {
251 case Intrinsic::amdgcn_rcp:
252 case Intrinsic::amdgcn_rcp_legacy:
253 case Intrinsic::amdgcn_sin:
254 case Intrinsic::amdgcn_fmul_legacy:
255 case Intrinsic::amdgcn_fmed3:
256 return true;
257 case Intrinsic::amdgcn_fma_legacy:
259 default:
260 return false;
261 }
262 }
263 default:
264 return false;
265 }
266}
267
270
271
272
273
274
275
276
277
278
279
280
281
282
283
289 };
290
291
299 else {
302 }
303 };
304
306
307
308
309 switch (MatchInfo->getOpcode()) {
310 case AMDGPU::G_FADD:
311 case AMDGPU::G_FSUB:
312 NegateOperand(MatchInfo->getOperand(1));
313 NegateOperand(MatchInfo->getOperand(2));
314 break;
315 case AMDGPU::G_FMUL:
317 break;
318 case AMDGPU::G_FMINNUM:
319 case AMDGPU::G_FMAXNUM:
320 case AMDGPU::G_FMINNUM_IEEE:
321 case AMDGPU::G_FMAXNUM_IEEE:
322 case AMDGPU::G_FMINIMUM:
323 case AMDGPU::G_FMAXIMUM:
324 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
325 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
326 NegateOperand(MatchInfo->getOperand(1));
327 NegateOperand(MatchInfo->getOperand(2));
330 break;
331 }
332 case AMDGPU::G_FMA:
333 case AMDGPU::G_FMAD:
335 NegateOperand(MatchInfo->getOperand(3));
336 break;
337 case AMDGPU::G_FPEXT:
338 case AMDGPU::G_INTRINSIC_TRUNC:
339 case AMDGPU::G_FRINT:
340 case AMDGPU::G_FNEARBYINT:
341 case AMDGPU::G_INTRINSIC_ROUND:
342 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
343 case AMDGPU::G_FSIN:
344 case AMDGPU::G_FCANONICALIZE:
345 case AMDGPU::G_AMDGPU_RCP_IFLAG:
346 case AMDGPU::G_FPTRUNC:
347 NegateOperand(MatchInfo->getOperand(1));
348 break;
349 case AMDGPU::G_INTRINSIC:
350 case AMDGPU::G_INTRINSIC_CONVERGENT: {
351 Intrinsic::ID IntrinsicID = cast(MatchInfo)->getIntrinsicID();
352 switch (IntrinsicID) {
353 case Intrinsic::amdgcn_rcp:
354 case Intrinsic::amdgcn_rcp_legacy:
355 case Intrinsic::amdgcn_sin:
356 NegateOperand(MatchInfo->getOperand(2));
357 break;
358 case Intrinsic::amdgcn_fmul_legacy:
360 break;
361 case Intrinsic::amdgcn_fmed3:
362 NegateOperand(MatchInfo->getOperand(2));
363 NegateOperand(MatchInfo->getOperand(3));
364 NegateOperand(MatchInfo->getOperand(4));
365 break;
366 case Intrinsic::amdgcn_fma_legacy:
368 NegateOperand(MatchInfo->getOperand(4));
369 break;
370 default:
371 llvm_unreachable("folding fneg not supported for this intrinsic");
372 }
373 break;
374 }
375 default:
376 llvm_unreachable("folding fneg not supported for this instruction");
377 }
378
379 Register Dst = MI.getOperand(0).getReg();
381
383
385 } else {
386
387
388
392
393
395
396
397 auto NextInst = ++MatchInfo->getIterator();
400 }
401
402 MI.eraseFromParent();
403}
404
405
406
410 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
411 Register SrcReg = Def->getOperand(1).getReg();
413 }
414
415 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
416 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
417 bool LosesInfo = true;
419 return !LosesInfo;
420 }
421
422 return false;
423}
424
429 assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);
430 Register SrcReg = MI.getOperand(1).getReg();
432 return false;
433
436}
437
442
443
447
453 MI.eraseFromParent();
454}
455
459 assert(MI.getOpcode() == TargetOpcode::G_FMUL);
462
463 Register Dst = MI.getOperand(0).getReg();
466
470 return false;
471
475
476 const auto SelectTrueVal =
478 if (!SelectTrueVal)
479 return false;
480 const auto SelectFalseVal =
482 if (!SelectFalseVal)
483 return false;
484
485 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
486 return false;
487
488
491 return false;
492
493 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
494 if (SelectTrueLog2Val == INT_MIN)
495 return false;
496 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
497 if (SelectFalseLog2Val == INT_MIN)
498 return false;
499
503 IntDestTy, SelectCondReg,
506
507 Register XReg = MI.getOperand(1).getReg();
508 if (SelectTrueVal->isNegative()) {
509 auto NegX =
512 } else {
514 }
515 };
516
517 return true;
518}
unsigned const MachineRegisterInfo * MRI
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getScalarSizeInBits(Type *Ty)
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)
bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo) const
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
Class for arbitrary precision integers.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
MachineIRBuilder & Builder
This class represents an Operation in the Expression.
Abstract class that contains various methods for clients to notify about changes.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr LLT getScalarType() const
static constexpr LLT float32()
Get a 32-bit IEEE float value.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Helper class to build MachineInstr.
MachineInstrBuilder buildFLdexp(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FLDEXP Src0, Src1.
MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
bool isInlineConstant(const APInt &Imm) const
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.
DWARFExpression::Operation Op
std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE