LLVM: lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
49
50public:
51 AMDGPUPreLegalizerCombinerImpl(
54 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
57
58 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
59
61 bool tryCombineAll(MachineInstr &I) const override;
62
63 struct ClampI64ToI16MatchInfo {
64 int64_t Cmp1 = 0;
65 int64_t Cmp2 = 0;
67 };
68
71 ClampI64ToI16MatchInfo &MatchInfo) const;
72
74 const ClampI64ToI16MatchInfo &MatchInfo) const;
75
76private:
77#define GET_GICOMBINER_CLASS_MEMBERS
78#define AMDGPUSubtarget GCNSubtarget
79#include "AMDGPUGenPreLegalizeGICombiner.inc"
80#undef GET_GICOMBINER_CLASS_MEMBERS
81#undef AMDGPUSubtarget
82};
83
84#define GET_GICOMBINER_IMPL
85#define AMDGPUSubtarget GCNSubtarget
86#include "AMDGPUGenPreLegalizeGICombiner.inc"
87#undef AMDGPUSubtarget
88#undef GET_GICOMBINER_IMPL
89
90AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
93 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95 : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96 Helper(Observer, B, true, &VT, MDT, LI, STI),
98#include "AMDGPUGenPreLegalizeGICombiner.inc"
100{
101}
102
103bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
104 if (tryCombineAllImpl(MI))
105 return true;
106
107 switch (MI.getOpcode()) {
108 case TargetOpcode::G_SHUFFLE_VECTOR:
110 }
111
112 return false;
113}
114
115bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
116 MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
117 ClampI64ToI16MatchInfo &MatchInfo) const {
118 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
119
120
121 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
123 return false;
124
125 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
127 return false;
128
130
131 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
132 const auto Cmp1 = MatchInfo.Cmp1;
133 const auto Cmp2 = MatchInfo.Cmp2;
134 const auto Diff = std::abs(Cmp2 - Cmp1);
135
136
137
138 if (Diff == 0 || Diff == 1)
139 return false;
140
141 const int64_t Min = std::numeric_limits<int16_t>::min();
142 const int64_t Max = std::numeric_limits<int16_t>::max();
143
144
145 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
146 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
147 };
148
149
154 return IsApplicableForCombine();
155 }
156 }
157
162 return IsApplicableForCombine();
163 }
164 }
165
166 return false;
167}
168
169
170
171
172
173
174
175
176void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
177 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
178
179 Register Src = MatchInfo.Origin;
182
183 auto Unmerge = B.buildUnmerge(S32, Src);
184
185 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
186
188 auto CvtPk =
189 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
190 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
191
192 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
193 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
194 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
195 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
196
197 auto Bitcast = B.buildBitcast({S32}, CvtPk);
198
199 auto Med3 = B.buildInstr(
200 AMDGPU::G_AMDGPU_SMED3, {S32},
201 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
202 MI.getFlags());
203
204 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
205
206 MI.eraseFromParent();
207}
208
209
210
211
212class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
213public:
214 static char ID;
215
216 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
217
218 StringRef getPassName() const override {
219 return "AMDGPUPreLegalizerCombiner";
220 }
221
222 bool runOnMachineFunction(MachineFunction &MF) override;
223
224 void getAnalysisUsage(AnalysisUsage &AU) const override;
225
226private:
227 bool IsOptNone;
228 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
229};
230}
231
232void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
236 AU.addRequired();
237 AU.addPreserved();
238 if (!IsOptNone) {
239 AU.addRequired();
240 AU.addPreserved();
241 }
242
243 AU.addRequired();
244 AU.addPreserved();
246}
247
248AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
249 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
250 if (!RuleConfig.parseCommandLineOption())
252}
253
254bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
256 return false;
257 auto *TPC = &getAnalysis();
259 bool EnableOpt =
262 &getAnalysis().get(MF);
263
264
266 getAnalysis().getCSEWrapper();
267 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
268
271 IsOptNone ? nullptr
272 : &getAnalysis().getDomTree();
273 CombinerInfo CInfo( true, false,
274 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
275
276 CInfo.MaxIterations = 1;
278
279
280 CInfo.EnableFullDCE = true;
281 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
283 return Impl.combineMachineInstrs();
284}
285
286char AMDGPUPreLegalizerCombiner::ID = 0;
288 "Combine AMDGPU machine instrs before legalization",
289 false, false)
293 "Combine AMDGPU machine instrs before legalization", false,
295
297 return new AMDGPUPreLegalizerCombiner(IsOptNone);
298}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
FunctionPass class - This class is used to implement most global optimizations.
const LegalizerInfo * getLegalizerInfo() const override
Simple wrapper that does the following.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition AMDGPUPreLegalizerCombiner.cpp:296
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...