LLVM: lib/Target/AMDGPU/SIFormMemoryClauses.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

22

23using namespace llvm;

24

25#define DEBUG_TYPE "si-form-memory-clauses"

26

27

28

31 cl::desc("Maximum length of a memory clause, instructions"));

32

33namespace {

34

35class SIFormMemoryClausesImpl {

37

38 bool canBundle(const MachineInstr &MI, const RegUse &Defs,

39 const RegUse &Uses) const;

41 void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;

42 bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,

44

50

51 unsigned LastRecordedOccupancy;

52 unsigned MaxVGPRs;

53 unsigned MaxSGPRs;

54

55public:

56 SIFormMemoryClausesImpl(LiveIntervals *LS) : LIS(LS) {}

57 bool run(MachineFunction &MF);

58};

59

61public:

62 static char ID;

63

64 SIFormMemoryClausesLegacy() : MachineFunctionPass(ID) {

66 }

67

68 bool runOnMachineFunction(MachineFunction &MF) override;

69

70 StringRef getPassName() const override {

71 return "SI Form memory clauses";

72 }

73

74 void getAnalysisUsage(AnalysisUsage &AU) const override {

75 AU.addRequired();

78 }

79

80 MachineFunctionProperties getClearedProperties() const override {

81 return MachineFunctionProperties().setIsSSA();

82 }

83};

84

85}

86

88 "SI Form memory clauses", false, false)

92

93char SIFormMemoryClausesLegacy::ID = 0;

94

96

98 return new SIFormMemoryClausesLegacy();

99}

100

104

108

109

110

112 assert(MI.isDebugInstr() && "debug instructions should not reach here");

113 if (MI.isBundled())

114 return false;

115 if (MI.mayLoad() || MI.mayStore())

116 return false;

118 return false;

120 return false;

122 return false;

123

125 Register ResReg = ResMO.getReg();

127 if (MO.getReg() == ResReg)

128 return false;

129 }

130 break;

131 }

132 return true;

133}

134

136 unsigned S = 0;

149 return S;

150}

151

152

153

154bool SIFormMemoryClausesImpl::canBundle(const MachineInstr &MI,

155 const RegUse &Defs,

156 const RegUse &Uses) const {

157

158 for (const MachineOperand &MO : MI.operands()) {

159

160

161 if (MO.isFI())

162 return false;

163

164 if (!MO.isReg())

165 continue;

166

168

169

170 if (MO.isTied())

171 return false;

172

173 const RegUse &Map = MO.isDef() ? Uses : Defs;

174 auto Conflict = Map.find(Reg);

175 if (Conflict == Map.end())

176 continue;

177

179 return false;

180

181 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());

182 if ((Conflict->second.second & Mask).any())

183 return false;

184 }

185

186 return true;

187}

188

189

190

191

192bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,

193 GCNDownwardRPTracker &RPT) {

194

195

196

197

200 unsigned Occupancy = MaxPressure.getOccupancy(

201 *ST,

202 MI.getMF()->getInfo()->getDynamicVGPRBlockSize());

203

204

205

206

207

208

209

210

211

212

215 MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {

216 LastRecordedOccupancy = Occupancy;

217 return true;

218 }

219 return false;

220}

221

222

223void SIFormMemoryClausesImpl::collectRegUses(const MachineInstr &MI,

224 RegUse &Defs, RegUse &Uses) const {

225 for (const MachineOperand &MO : MI.operands()) {

226 if (!MO.isReg())

227 continue;

229 if (Reg)

230 continue;

231

233 ? TRI->getSubRegIndexLaneMask(MO.getSubReg())

235 RegUse &Map = MO.isDef() ? Defs : Uses;

236

238 auto [Loc, Inserted] = Map.try_emplace(Reg, State, Mask);

239 if (!Inserted) {

240 Loc->second.first |= State;

241 Loc->second.second |= Mask;

242 }

243 }

244}

245

246

247

248

249bool SIFormMemoryClausesImpl::processRegUses(const MachineInstr &MI,

250 RegUse &Defs, RegUse &Uses,

251 GCNDownwardRPTracker &RPT) {

252 if (!canBundle(MI, Defs, Uses))

253 return false;

254

255 if (!checkPressure(MI, RPT))

256 return false;

257

258 collectRegUses(MI, Defs, Uses);

259 return true;

260}

261

262bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {

265 return false;

266

270 MFI = MF.getInfo();

273

274 MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();

275 MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();

277 "amdgpu-max-memory-clause", MaxClause);

278

279 for (MachineBasicBlock &MBB : MF) {

280 GCNDownwardRPTracker RPT(*LIS);

283 MachineInstr &MI = *I;

284 Next = std::next(I);

285

286 if (MI.isMetaInstruction())

287 continue;

288

290

292 continue;

293

296 else {

299 }

300

302 RegUse Defs, Uses;

303 if (!processRegUses(MI, Defs, Uses, RPT)) {

304 RPT.reset(MI, &LiveRegsCopy);

305 continue;

306 }

307

311

312 if (Next->isMetaInstruction())

313 continue;

314

316 break;

317

318

319

320

321 if (!processRegUses(*Next, Defs, Uses, RPT))

322 break;

323

324 LastClauseInst = Next;

326 }

328 RPT.reset(MI, &LiveRegsCopy);

329 continue;

330 }

331

334

335 assert(!LastClauseInst->isMetaInstruction());

336

338 SlotIndex ClauseLiveOutIdx =

340

341

342 MachineInstrBuilder Kill;

343

344

345

346 for (auto &&R : Uses) {

349 continue;

350

351

353 const LiveInterval &LI = LIS->getInterval(R.first);

354

356 if (!LI.liveAt(ClauseLiveOutIdx)) {

358 AMDGPU::NoSubRegister);

359 }

360 } else {

361 LaneBitmask KilledMask;

362 for (const LiveInterval::SubRange &SR : LI.subranges()) {

363 if (SR.liveAt(ClauseLiveInIdx) && !SR.liveAt(ClauseLiveOutIdx))

364 KilledMask |= SR.LaneMask;

365 }

366

367 if (KilledMask.none())

368 continue;

369

370 SmallVector KilledIndexes;

371 bool Success = TRI->getCoveringSubRegIndexes(

372 MRI->getRegClass(Reg), KilledMask, KilledIndexes);

374 assert(Success && "Failed to find subregister mask to cover lanes");

375 for (unsigned SubReg : KilledIndexes) {

377 }

378 }

379

380 if (KillOps.empty())

381 continue;

382

383

384

385

386

387

388 Kill = BuildMI(*MI.getParent(), std::next(LastClauseInst),

390 for (auto &Op : KillOps)

391 Kill.addUse(Reg, std::get<0>(Op), std::get<1>(Op));

392 Ind->insertMachineInstrInMaps(*Kill);

393 }

394

395

396 RPT.reset(MI, &LiveRegsCopy);

397

398 if (!Kill)

399 continue;

400

401 for (auto &&R : Defs) {

405 continue;

408 }

409

410 for (auto &&R : Uses) {

413 continue;

416 }

417 }

418 }

419

421}

422

423bool SIFormMemoryClausesLegacy::runOnMachineFunction(MachineFunction &MF) {

425 return false;

426

427 LiveIntervals *LIS = &getAnalysis().getLIS();

428 return SIFormMemoryClausesImpl(LIS).run(MF);

429}

430

431PreservedAnalyses

435 SIFormMemoryClausesImpl(&LIS).run(MF);

437}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Remove Loads Into Fake Uses

static cl::opt< unsigned > MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15), cl::desc("Maximum length of a memory clause, instructions"))

static bool isVMEMClauseInst(const MachineInstr &MI)

Definition SIFormMemoryClauses.cpp:101

static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause)

Definition SIFormMemoryClauses.cpp:111

static unsigned getMopState(const MachineOperand &MO)

Definition SIFormMemoryClauses.cpp:135

static bool isSMEMClauseInst(const MachineInstr &MI)

Definition SIFormMemoryClauses.cpp:105

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

FunctionPass class - This class is used to implement most global optimizations.

uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const

For a string attribute Kind, parse attribute as an integer.

GCNRegPressure moveMaxPressure()

return MaxPressure and clear it.

bool advanceBeforeNext(MachineInstr *MI=nullptr, bool UseInternalIterator=true)

Move to the state right before the next MI or after the end of MBB.

bool advance(MachineInstr *MI=nullptr, bool UseInternalIterator=true)

Move to the state at the next MI.

MachineBasicBlock::const_iterator getNext() const

bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs=nullptr)

Reset tracker to the point before the MI filling LiveRegs upon this point using LIS.

void advanceToNext(MachineInstr *MI=nullptr, bool UseInternalIterator=true)

Move to the state at the MI, advanceBeforeNext has to be called first.

const decltype(LiveRegs) & getLiveRegs() const

DenseMap< unsigned, LaneBitmask > LiveRegSet

bool hasGFX90AInsts() const

const SIInstrInfo * getInstrInfo() const override

const SIRegisterInfo * getRegisterInfo() const override

bool isXNACKEnabled() const

bool hasSubRanges() const

Returns true if subregister liveness information is available.

iterator_range< subrange_iterator > subranges()

LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

SlotIndexes * getSlotIndexes() const

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

LiveInterval & getInterval(Register Reg)

void removeInterval(Register Reg)

Interval removal.

LiveInterval & createAndComputeVirtRegInterval(Register Reg)

bool liveAt(SlotIndex index) const

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

instr_iterator instr_begin()

MachineInstrBundleIterator< const MachineInstr > const_iterator

Instructions::iterator instr_iterator

instr_iterator instr_end()

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

bool isValid() const

Check for null.

Representation of each machine instruction.

MachineOperand class - Representation of each machine instruction operand.

LLVM_ABI bool isRenamable() const

isRenamable - Returns true if this register may be renamed, i.e.

bool isEarlyClobber() const

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Wrapper class representing virtual and physical registers.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition SIFormMemoryClauses.cpp:432

static bool isVMEM(const MachineInstr &MI)

static bool isSMRD(const MachineInstr &MI)

static bool isAtomic(const MachineInstr &MI)

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

unsigned getMinAllowedOccupancy() const

void limitOccupancy(const MachineFunction &MF)

SlotIndex getNextIndex() const

Returns the next index.

reference emplace_back(ArgTypes &&... Args)

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

@ Renamable

Register that may be renamed.

@ Kill

The last use of a register.

@ Undef

Value of the register doesn't matter.

@ EarlyClobber

Register definition happens before uses.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

FunctionPass * createSIFormMemoryClausesLegacyPass()

Definition SIFormMemoryClauses.cpp:97

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

char & SIFormMemoryClausesID

Definition SIFormMemoryClauses.cpp:95

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

void initializeSIFormMemoryClausesLegacyPass(PassRegistry &)

FunctionAddr VTableAddr Next

DWARFExpression::Operation Op

unsigned getVGPRNum(bool UnifiedVGPRFile) const

unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize) const

unsigned getSGPRNum() const

static constexpr LaneBitmask getAll()

constexpr bool none() const