LLVM: lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

29

30#define GET_GICOMBINER_DEPS

31#include "AMDGPUGenPreLegalizeGICombiner.inc"

32#undef GET_GICOMBINER_DEPS

33

34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"

35

36using namespace llvm;

38namespace {

39

40#define GET_GICOMBINER_TYPES

41#include "AMDGPUGenPreLegalizeGICombiner.inc"

42#undef GET_GICOMBINER_TYPES

43

44class AMDGPUPreLegalizerCombinerImpl : public Combiner {

45protected:

46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;

49

50public:

51 AMDGPUPreLegalizerCombinerImpl(

54 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,

57

58 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }

59

61 bool tryCombineAll(MachineInstr &I) const override;

62

63 struct ClampI64ToI16MatchInfo {

64 int64_t Cmp1 = 0;

65 int64_t Cmp2 = 0;

67 };

68

71 ClampI64ToI16MatchInfo &MatchInfo) const;

72

74 const ClampI64ToI16MatchInfo &MatchInfo) const;

75

76private:

77#define GET_GICOMBINER_CLASS_MEMBERS

78#define AMDGPUSubtarget GCNSubtarget

79#include "AMDGPUGenPreLegalizeGICombiner.inc"

80#undef GET_GICOMBINER_CLASS_MEMBERS

81#undef AMDGPUSubtarget

82};

83

84#define GET_GICOMBINER_IMPL

85#define AMDGPUSubtarget GCNSubtarget

86#include "AMDGPUGenPreLegalizeGICombiner.inc"

87#undef AMDGPUSubtarget

88#undef GET_GICOMBINER_IMPL

89

90AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(

93 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,

95 : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),

96 Helper(Observer, B, true, &VT, MDT, LI, STI),

98#include "AMDGPUGenPreLegalizeGICombiner.inc"

100{

101}

102

103bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {

104 if (tryCombineAllImpl(MI))

105 return true;

106

107 switch (MI.getOpcode()) {

108 case TargetOpcode::G_SHUFFLE_VECTOR:

110 }

111

112 return false;

113}

114

115bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(

116 MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,

117 ClampI64ToI16MatchInfo &MatchInfo) const {

118 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");

119

120

121 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());

123 return false;

124

125 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());

127 return false;

128

130

131 auto IsApplicableForCombine = [&MatchInfo]() -> bool {

132 const auto Cmp1 = MatchInfo.Cmp1;

133 const auto Cmp2 = MatchInfo.Cmp2;

134 const auto Diff = std::abs(Cmp2 - Cmp1);

135

136

137

138 if (Diff == 0 || Diff == 1)

139 return false;

140

141 const int64_t Min = std::numeric_limits<int16_t>::min();

142 const int64_t Max = std::numeric_limits<int16_t>::max();

143

144

145 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||

146 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));

147 };

148

149

154 return IsApplicableForCombine();

155 }

156 }

157

162 return IsApplicableForCombine();

163 }

164 }

165

166 return false;

167}

168

169

170

171

172

173

174

175

176void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(

177 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {

178

179 Register Src = MatchInfo.Origin;

182

183 auto Unmerge = B.buildUnmerge(S32, Src);

184

185 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);

186

188 auto CvtPk =

189 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},

190 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());

191

192 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);

193 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);

194 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);

195 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);

196

197 auto Bitcast = B.buildBitcast({S32}, CvtPk);

198

199 auto Med3 = B.buildInstr(

200 AMDGPU::G_AMDGPU_SMED3, {S32},

201 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},

202 MI.getFlags());

203

204 B.buildTrunc(MI.getOperand(0).getReg(), Med3);

205

206 MI.eraseFromParent();

207}

208

209

210

211

212class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {

213public:

214 static char ID;

215

216 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);

217

218 StringRef getPassName() const override {

219 return "AMDGPUPreLegalizerCombiner";

220 }

221

222 bool runOnMachineFunction(MachineFunction &MF) override;

223

224 void getAnalysisUsage(AnalysisUsage &AU) const override;

225

226private:

227 bool IsOptNone;

228 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;

229};

230}

231

232void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

236 AU.addRequired();

237 AU.addPreserved();

238 if (!IsOptNone) {

239 AU.addRequired();

240 AU.addPreserved();

241 }

242

243 AU.addRequired();

244 AU.addPreserved();

246}

247

248AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)

249 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

250 if (!RuleConfig.parseCommandLineOption())

252}

253

254bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {

256 return false;

257 auto *TPC = &getAnalysis();

259 bool EnableOpt =

262 &getAnalysis().get(MF);

263

264

266 getAnalysis().getCSEWrapper();

267 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());

268

271 IsOptNone ? nullptr

272 : &getAnalysis().getDomTree();

273 CombinerInfo CInfo( true, false,

274 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());

275

276 CInfo.MaxIterations = 1;

278

279

280 CInfo.EnableFullDCE = true;

281 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,

283 return Impl.combineMachineInstrs();

284}

285

286char AMDGPUPreLegalizerCombiner::ID = 0;

288 "Combine AMDGPU machine instrs before legalization",

289 false, false)

293 "Combine AMDGPU machine instrs before legalization", false,

295

297 return new AMDGPUPreLegalizerCombiner(IsOptNone);

298}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

#define GET_GICOMBINER_CONSTRUCTOR_INITS

amdgpu aa AMDGPU Address space based Alias Analysis Wrapper

This contains common combine transformations that may be used in a combine pass.

This file declares the targeting of the Machinelegalizer class for AMDGPU.

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Provides analysis for continuously CSEing during GISel passes.

This contains common combine transformations that may be used in a combine pass,or by the target else...

Option class for Targets to specify which operations are combined how and when.

This contains the base class for all Combiners generated by TableGen.

AMD GCN specific subclass of TargetSubtarget.

Provides analysis for querying information about KnownBits during GISel passes.

Contains matchers for matching SSA Machine Instructions.

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static StringRef getName(Value *V)

Target-Independent Code Generator Pass Configuration Options pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

bool tryCombineShuffleVector(MachineInstr &MI) const

Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.

FunctionPass class - This class is used to implement most global optimizations.

const LegalizerInfo * getLegalizerInfo() const override

Simple wrapper that does the following.

To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)

Get a low-level fixed-width vector of some number of elements and element width.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineFunctionProperties & getProperties() const

Get the function properties.

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Representation of each machine instruction.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Wrapper class representing virtual and physical registers.

CodeGenOptLevel getOptLevel() const

Returns the optimization level: None, Less, Default, or Aggressive.

Target-Independent Code Generator Pass Configuration Options.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Bitcast

Perform the operation on a different, but equivalently sized type.

operand_type_match m_Reg()

ConstantMatch< APInt > m_ICst(APInt &Cst)

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)

Modify analysis usage so it preserves passes required for the SelectionDAG fallback.

FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)

Definition AMDGPUPreLegalizerCombiner.cpp:296

@ SinglePass

Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...