LLVM: lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-regbank-combiner"
35
36using namespace llvm;
38
39namespace {
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenRegBankGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPURegBankCombinerImpl : public Combiner {
45protected:
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
52
53public:
54 AMDGPURegBankCombinerImpl(
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
60
61 static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
62
63 bool tryCombineAll(MachineInstr &I) const override;
64
67
68 struct MinMaxMedOpc {
69 unsigned Min, Max, Med;
70 };
71
72 struct Med3MatchInfo {
73 unsigned Opc;
75 };
76
77 MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
78
79 template <class m_Cst, typename CstTy>
81 Register &Val, CstTy &K0, CstTy &K1) const;
82
83 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
84 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
87 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
89
91
93 bool applyD16Load(unsigned D16Opc, MachineInstr &DstMI,
95
96private:
98 bool getIEEE() const;
99 bool getDX10Clamp() const;
103
104#define GET_GICOMBINER_CLASS_MEMBERS
105#define AMDGPUSubtarget GCNSubtarget
106#include "AMDGPUGenRegBankGICombiner.inc"
107#undef GET_GICOMBINER_CLASS_MEMBERS
108#undef AMDGPUSubtarget
109};
110
111#define GET_GICOMBINER_IMPL
112#define AMDGPUSubtarget GCNSubtarget
113#include "AMDGPUGenRegBankGICombiner.inc"
114#undef AMDGPUSubtarget
115#undef GET_GICOMBINER_IMPL
116
117AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
120 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
122 : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
123 RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
124 TII(*STI.getInstrInfo()),
125 Helper(Observer, B, false, &VT, MDT, LI),
127#include "AMDGPUGenRegBankGICombiner.inc"
129{
130}
131
132bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
134}
135
137 if (isVgprRegBank(Reg))
138 return Reg;
139
140
141 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
143 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
144 return Def;
145 }
146
147
149 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
150 return VgprReg;
151}
152
153AMDGPURegBankCombinerImpl::MinMaxMedOpc
154AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
155 switch (Opc) {
156 default:
158 case AMDGPU::G_SMAX:
159 case AMDGPU::G_SMIN:
160 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
161 case AMDGPU::G_UMAX:
162 case AMDGPU::G_UMIN:
163 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
164 case AMDGPU::G_FMAXNUM:
165 case AMDGPU::G_FMINNUM:
166 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
167 case AMDGPU::G_FMAXNUM_IEEE:
168 case AMDGPU::G_FMINNUM_IEEE:
169 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
170 AMDGPU::G_AMDGPU_FMED3};
171 }
172}
173
174template <class m_Cst, typename CstTy>
175bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
176 MachineRegisterInfo &MRI,
177 MinMaxMedOpc MMMOpc, Register &Val,
178 CstTy &K0, CstTy &K1) const {
179
180
181
182
183
184
190 m_Cst(K1)),
193 m_Cst(K0))));
194}
195
196bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
197 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
198 Register Dst = MI.getOperand(0).getReg();
199 if (!isVgprRegBank(Dst))
200 return false;
201
202
203 LLT Ty = MRI.getType(Dst);
205 return false;
206
207 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
209 std::optional K0, K1;
210
211 if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1))
212 return false;
213
214 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
215 return false;
216 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
217 return false;
218
219 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
220 return true;
221}
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
242 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
243 Register Dst = MI.getOperand(0).getReg();
244 LLT Ty = MRI.getType(Dst);
245
246
248 return false;
249
250 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
251
253 std::optional K0, K1;
254
255 if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1))
256 return false;
257
258 if (K0->Value > K1->Value)
259 return false;
260
261
262
263
264
265
266
267
269
270 if ((.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
271 (.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
272 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
273 return true;
274 }
275 }
276
277 return false;
278}
279
280bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
282
283 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
285 std::optional K0, K1;
286
287 if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1))
288 return false;
289
290 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
291 return false;
292
293
294
295
296
297 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
300 Reg = Val;
301 return true;
302 }
303
304 return false;
305}
306
307
308
309
310
311
312
313
314
315
316bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
318
319
323
324 if (isFCst(Src0) && !isFCst(Src1))
326 if (isFCst(Src1) && !isFCst(Src2))
328 if (isFCst(Src0) && !isFCst(Src1))
331 return false;
332
334
335 auto isOp3Zero = [&]() {
337 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
339 return false;
340 };
341
342
343
344
346 (getIEEE() && getDX10Clamp() &&
348 Reg = Val;
349 return true;
350 }
351
352 return false;
353}
354
355void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
357 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
358 MI.getFlags());
359 MI.eraseFromParent();
360}
361
362void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
363 Med3MatchInfo &MatchInfo) const {
364 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
365 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
366 getAsVgpr(MatchInfo.Val2)},
367 MI.getFlags());
368 MI.eraseFromParent();
369}
370
371void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
372 MachineInstr &MI, MachineInstr &Ext) const {
373 unsigned ShOpc = MI.getOpcode();
374 assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
375 ShOpc == AMDGPU::G_ASHR);
377
379 Register ShDst = MI.getOperand(0).getReg();
380 Register ShSrc = MI.getOperand(1).getReg();
381
383 LLT AmtTy = MRI.getType(AmtReg);
384
385 auto &RB = *MRI.getRegBank(AmtReg);
386
387 auto NewExt = B.buildAnyExt(ExtAmtTy, AmtReg);
388 auto Mask = B.buildConstant(
390 auto And = B.buildAnd(ExtAmtTy, NewExt, Mask);
391 B.buildInstr(ShOpc, {ShDst}, {ShSrc, And});
392
393 MRI.setRegBank(NewExt.getReg(0), RB);
394 MRI.setRegBank(Mask.getReg(0), RB);
395 MRI.setRegBank(And.getReg(0), RB);
396 MI.eraseFromParent();
397}
398
399bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
401 MachineInstr *Load, *SextLoad;
402 const int64_t CleanLo16 = 0xFFFFFFFFFFFF0000;
403 const int64_t CleanHi16 = 0x000000000000FFFF;
404
405
410
411 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
412 const MachineMemOperand *MMO = *Load->memoperands_begin();
414 if (LoadSize == 8)
415 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_U8, MI, Load, Dst);
416 if (LoadSize == 16)
417 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO, MI, Load, Dst);
418 return false;
419 }
420
422 Load, MRI,
424 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
425 return false;
426
429 return false;
430
431 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8, MI, SextLoad, Dst);
432 }
433
434 return false;
435 }
436
437
442
443 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
444 const MachineMemOperand *MMO = *Load->memoperands_begin();
446 if (LoadSize == 8)
447 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_U8, MI, Load, Dst);
448 if (LoadSize == 16)
449 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI, MI, Load, Dst);
450 return false;
451 }
452
454 Load, MRI,
456 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
457 return false;
460 return false;
461
462 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8, MI, SextLoad, Dst);
463 }
464
465 return false;
466 }
467
468 return false;
469}
470
471bool AMDGPURegBankCombinerImpl::applyD16Load(
472 unsigned D16Opc, MachineInstr &DstMI, MachineInstr *SmallLoad,
473 Register SrcReg32ToOverwriteD16) const {
478 return true;
479}
480
481SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
482 return MF.getInfo()->getMode();
483}
484
485bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
486
487bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
488 return getMode().DX10Clamp;
489}
490
491bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
492 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
493}
494
495bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
496 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
497}
498
499bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
500 MachineInstr *K1) const {
501 if (isFCst(K0) && isFCst(K1)) {
506 }
507 return false;
508}
509
510
511
512
513class AMDGPURegBankCombiner : public MachineFunctionPass {
514public:
515 static char ID;
516
517 AMDGPURegBankCombiner(bool IsOptNone = false);
518
519 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
520
521 bool runOnMachineFunction(MachineFunction &MF) override;
522
523 void getAnalysisUsage(AnalysisUsage &AU) const override;
524
525private:
526 bool IsOptNone;
527 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
528};
529}
530
531void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
535 AU.addRequired();
536 AU.addPreserved();
537 if (!IsOptNone) {
538 AU.addRequired();
539 AU.addPreserved();
540 }
542}
543
544AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
545 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
546 if (!RuleConfig.parseCommandLineOption())
548}
549
550bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
552 return false;
553 auto *TPC = &getAnalysis();
555 bool EnableOpt =
557
560 &getAnalysis().get(MF);
561
562 const auto *LI = ST.getLegalizerInfo();
564 IsOptNone ? nullptr
565 : &getAnalysis().getDomTree();
566
567 CombinerInfo CInfo( false, true,
568 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
569
570 CInfo.MaxIterations = 1;
572
573
574 CInfo.EnableFullDCE = false;
575 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *VT, nullptr,
576 RuleConfig, ST, MDT, LI);
577 return Impl.combineMachineInstrs();
578}
579
580char AMDGPURegBankCombiner::ID = 0;
582 "Combine AMDGPU machine instrs after regbankselect",
583 false, false)
587 "Combine AMDGPU machine instrs after regbankselect", false,
589
591 return new AMDGPURegBankCombiner(IsOptNone);
592}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
const TargetInstrInfo & TII
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
FunctionPass class - This class is used to implement most global optimizations.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
TypeSize getValue() const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
NodeAddr< DefNode * > Def
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition AMDGPURegBankCombiner.cpp:590
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...