LLVM: lib/Target/AMDGPU/SIInsertHardClauses.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

40

41using namespace llvm;

42

43#define DEBUG_TYPE "si-insert-hard-clauses"

44

47 cl::desc("Maximum number of memory instructions to "

48 "place in the same hard clause"),

50

51namespace {

52

53enum HardClauseType {

54

55

56

57 HARDCLAUSE_VMEM,

58

59 HARDCLAUSE_FLAT,

60

61

62

63

64 HARDCLAUSE_MIMG_LOAD,

65 HARDCLAUSE_MIMG_STORE,

66 HARDCLAUSE_MIMG_ATOMIC,

67 HARDCLAUSE_MIMG_SAMPLE,

68

69 HARDCLAUSE_VMEM_LOAD,

70 HARDCLAUSE_VMEM_STORE,

71 HARDCLAUSE_VMEM_ATOMIC,

72

73 HARDCLAUSE_FLAT_LOAD,

74 HARDCLAUSE_FLAT_STORE,

75 HARDCLAUSE_FLAT_ATOMIC,

76

77 HARDCLAUSE_BVH,

78

79

80

81

82 HARDCLAUSE_LDS,

83

84 HARDCLAUSE_SMEM,

85

86 HARDCLAUSE_VALU,

87 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,

88

89

90

91 HARDCLAUSE_INTERNAL,

92

93 HARDCLAUSE_IGNORE,

94

95

96 HARDCLAUSE_ILLEGAL,

97};

98

99class SIInsertHardClauses {

100public:

101 const GCNSubtarget *ST = nullptr;

102

103 HardClauseType getHardClauseType(const MachineInstr &MI) {

104 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {

105 if (ST->getGeneration() == AMDGPUSubtarget::GFX10 ||

106 ST->hasGFX1250Insts()) {

109 if (ST->hasNSAClauseBug()) {

111 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)

112 return HARDCLAUSE_ILLEGAL;

113 }

114 return HARDCLAUSE_VMEM;

115 }

117 return HARDCLAUSE_FLAT;

118 } else {

121 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =

123 if (BaseInfo->BVH)

124 return HARDCLAUSE_BVH;

126 return HARDCLAUSE_MIMG_SAMPLE;

127 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC

128 : HARDCLAUSE_MIMG_LOAD

129 : HARDCLAUSE_MIMG_STORE;

130 }

133 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC

134 : HARDCLAUSE_VMEM_LOAD

135 : HARDCLAUSE_VMEM_STORE;

136 }

138 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC

139 : HARDCLAUSE_FLAT_LOAD

140 : HARDCLAUSE_FLAT_STORE;

141 }

142 }

143

145 return HARDCLAUSE_SMEM;

146 }

147

148

149

150

151

152 if (MI.getOpcode() == AMDGPU::S_NOP)

153 return HARDCLAUSE_INTERNAL;

154 if (MI.isMetaInstruction())

155 return HARDCLAUSE_IGNORE;

156 return HARDCLAUSE_ILLEGAL;

157 }

158

159

160 struct ClauseInfo {

161

162 HardClauseType Type = HARDCLAUSE_ILLEGAL;

163

164 MachineInstr *First = nullptr;

165

166 MachineInstr *Last = nullptr;

167

168

169 unsigned Length = 0;

170

171

172

173 unsigned TrailingInternalLength = 0;

174

176 };

177

178 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {

179 if (CI.First == CI.Last)

180 return false;

181 assert(CI.Length <= ST->maxHardClauseLength() &&

182 "Hard clause is too long!");

183

184 auto &MBB = *CI.First->getParent();

185 auto ClauseMI =

187 .addImm(CI.Length - 1);

189 std::next(CI.Last->getIterator()));

190 return true;

191 }

192

193 bool run(MachineFunction &MF) {

195 if (!ST->hasHardClauses())

196 return false;

197

199 "amdgpu-hard-clause-length-limit", 255);

202 MaxClauseLength = std::min(MaxClauseLength, ST->maxHardClauseLength());

203 if (MaxClauseLength <= 1)

204 return false;

205

206 const SIInstrInfo *SII = ST->getInstrInfo();

207 const TargetRegisterInfo *TRI = ST->getRegisterInfo();

208

210 for (auto &MBB : MF) {

211 ClauseInfo CI;

212 for (auto &MI : MBB) {

213 HardClauseType Type = getHardClauseType(MI);

214

215 int64_t Dummy1;

216 bool Dummy2;

219 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {

221 Dummy3, TRI)) {

222

223

224 Type = HARDCLAUSE_ILLEGAL;

225 }

226 }

227

228 if (CI.Length == MaxClauseLength ||

229 (CI.Length && Type != HARDCLAUSE_INTERNAL &&

230 Type != HARDCLAUSE_IGNORE &&

231 (Type != CI.Type ||

232

233

234

235

236

237

238

240 2, 2)))) {

241

242 Changed |= emitClause(CI, SII);

243 CI = ClauseInfo();

244 }

245

246 if (CI.Length) {

247

248 if (Type != HARDCLAUSE_IGNORE) {

249 if (Type == HARDCLAUSE_INTERNAL) {

250 ++CI.TrailingInternalLength;

251 } else {

252 ++CI.Length;

253 CI.Length += CI.TrailingInternalLength;

254 CI.TrailingInternalLength = 0;

255 CI.Last = &MI;

256 CI.BaseOps = std::move(BaseOps);

257 }

258 }

259 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {

260

261 CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};

262 }

263 }

264

265

266 if (CI.Length)

267 Changed |= emitClause(CI, SII);

268 }

269

271 }

272};

273

275public:

276 static char ID;

277 SIInsertHardClausesLegacy() : MachineFunctionPass(ID) {}

278

279 bool runOnMachineFunction(MachineFunction &MF) override {

281 return false;

282

283 return SIInsertHardClauses().run(MF);

284 }

285

286 void getAnalysisUsage(AnalysisUsage &AU) const override {

289 }

290};

291

292}

293

297 if (!SIInsertHardClauses().run(MF))

299

302 return PA;

303}

304

305char SIInsertHardClausesLegacy::ID = 0;

306

308

310 false, false)

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Provides AMDGPU specific target descriptions.

Analysis containing CSE Info

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static cl::opt< unsigned > HardClauseLengthLimit("amdgpu-hard-clause-length-limit", cl::desc("Maximum number of memory instructions to " "place in the same hard clause"), cl::Hidden)

This file defines the SmallVector class.

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

Represents analyses that only rely on functions' control flow.

uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const

For a string attribute Kind, parse attribute as an integer.

static LocationSize precise(uint64_t Value)

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition SIInsertHardClauses.cpp:295

static bool isVMEM(const MachineInstr &MI)

bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final

static bool isSMRD(const MachineInstr &MI)

bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override

static bool isSegmentSpecificFLAT(const MachineInstr &MI)

static bool isMIMG(const MachineInstr &MI)

static bool isFLAT(const MachineInstr &MI)

self_iterator getIterator()

LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

char & SIInsertHardClausesID

Definition SIInsertHardClauses.cpp:307