LLVM: lib/Target/AMDGPU/GCNNSAReassign.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

27

28using namespace llvm;

29

30#define DEBUG_TYPE "amdgpu-nsa-reassign"

31

33 "Number of NSA instructions with non-sequential address found");

35 "Number of NSA instructions changed to sequential");

36

37namespace {

38class GCNNSAReassignImpl {

39public:

41 : VRM(VM), LRM(LM), LIS(LS) {}

42

43 bool run(MachineFunction &MF);

44

45private:

46 enum NSA_Status {

47 NOT_NSA,

48 FIXED,

49 NON_CONTIGUOUS,

50

51 CONTIGUOUS

52 };

53

54 const GCNSubtarget *ST;

55

56 const MachineRegisterInfo *MRI;

57

58 const SIRegisterInfo *TRI;

59

60 VirtRegMap *VRM;

61

62 LiveRegMatrix *LRM;

63

64 LiveIntervals *LIS;

65

66 unsigned MaxNumVGPRs;

67

69

70 NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;

71

72 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,

73 unsigned StartReg) const;

74

75 bool canAssign(unsigned StartReg, unsigned NumRegs) const;

76

77 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;

78};

79

81public:

82 static char ID;

83

84 GCNNSAReassignLegacy() : MachineFunctionPass(ID) {

86 }

87

88 bool runOnMachineFunction(MachineFunction &MF) override;

89

90 StringRef getPassName() const override { return "GCN NSA Reassign"; };

91

92 void getAnalysisUsage(AnalysisUsage &AU) const override {

93 AU.addRequired();

94 AU.addRequired();

95 AU.addRequired();

98 }

99};

100

101}

102

104 false, false)

110

111char GCNNSAReassignLegacy::ID = 0;

112

114

115bool GCNNSAReassignImpl::tryAssignRegisters(

117 unsigned NumRegs = Intervals.size();

118

119 for (unsigned N = 0; N < NumRegs; ++N)

120 if (VRM->hasPhys(Intervals[N]->reg()))

121 LRM->unassign(*Intervals[N]);

122

123 for (unsigned N = 0; N < NumRegs; ++N)

124 if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))

125 return false;

126

127 for (unsigned N = 0; N < NumRegs; ++N)

129

130 return true;

131}

132

133bool GCNNSAReassignImpl::canAssign(unsigned StartReg, unsigned NumRegs) const {

134 for (unsigned N = 0; N < NumRegs; ++N) {

135 unsigned Reg = StartReg + N;

136 if (MRI->isAllocatable(Reg))

137 return false;

138

139 for (unsigned I = 0; CSRegs[I]; ++I)

140 if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&

142 return false;

143 }

144

145 return true;

146}

147

148bool GCNNSAReassignImpl::scavengeRegs(

149 SmallVectorImpl<LiveInterval *> &Intervals) const {

150 unsigned NumRegs = Intervals.size();

151

152 if (NumRegs > MaxNumVGPRs)

153 return false;

154 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;

155

156 for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {

157 if (!canAssign(Reg, NumRegs))

158 continue;

159

160 if (tryAssignRegisters(Intervals, Reg))

161 return true;

162 }

163

164 return false;

165}

166

167GCNNSAReassignImpl::NSA_Status

168GCNNSAReassignImpl::CheckNSA(const MachineInstr &MI, bool Fast) const {

171 return NSA_Status::NOT_NSA;

172

173 switch (Info->MIMGEncoding) {

174 case AMDGPU::MIMGEncGfx10NSA:

175 case AMDGPU::MIMGEncGfx11NSA:

176 break;

177 default:

178 return NSA_Status::NOT_NSA;

179 }

180

181 int VAddr0Idx =

182 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);

183

184 unsigned VgprBase = 0;

185 bool NSA = false;

186 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {

187 const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);

190 return NSA_Status::FIXED;

191

193

195 if (!PhysReg)

196 return NSA_Status::FIXED;

197

198

199

200

201

202

203

204

205

206

207 if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg())

208 return NSA_Status::FIXED;

209

210

211

212

213

214

216 return NSA_Status::FIXED;

217

218 const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);

219

220 if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)

221 return NSA_Status::FIXED;

222

223 for (auto U : MRI->use_nodbg_operands(Reg)) {

224 if (U.isImplicit())

225 return NSA_Status::FIXED;

226 const MachineInstr *UseInst = U.getParent();

228 return NSA_Status::FIXED;

229 }

230

232 return NSA_Status::FIXED;

233 }

234

235 if (I == 0)

236 VgprBase = PhysReg;

237 else if (VgprBase + I != PhysReg)

238 NSA = true;

239 }

240

241 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;

242}

243

244bool GCNNSAReassignImpl::run(MachineFunction &MF) {

247 return false;

248

251

252 const SIMachineFunctionInfo *MFI = MF.getInfo();

254 MaxNumVGPRs = std::min(

256 MaxNumVGPRs);

257 CSRegs = MRI->getCalleeSavedRegs();

258

259 using Candidate = std::pair<const MachineInstr*, bool>;

261 for (const MachineBasicBlock &MBB : MF) {

262 for (const MachineInstr &MI : MBB) {

263 switch (CheckNSA(MI)) {

264 default:

265 continue;

266 case NSA_Status::CONTIGUOUS:

267 Candidates.push_back(std::pair(&MI, true));

268 break;

269 case NSA_Status::NON_CONTIGUOUS:

270 Candidates.push_back(std::pair(&MI, false));

271 ++NumNSAInstructions;

272 break;

273 }

274 }

275 }

276

278 for (auto &C : Candidates) {

279 if (C.second)

280 continue;

281

282 const MachineInstr *MI = C.first;

283 if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {

284

285 C.second = true;

286 ++NumNSAConverted;

287 continue;

288 }

289

291 int VAddr0Idx =

292 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);

293

296 SlotIndex MinInd, MaxInd;

297 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {

298 const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);

302

303 Intervals.clear();

304 break;

305 }

308 if (LI->empty()) {

309

310

311 if (I == 0)

313 continue;

314 }

315 MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();

316 MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();

317 }

318

319 if (Intervals.empty())

320 continue;

321

323 << "\tOriginal allocation:\t";

324 for (auto *LI

325 : Intervals) dbgs()

327 dbgs() << '\n');

328

329 bool Success = scavengeRegs(Intervals);

332 if (VRM->hasPhys(Intervals.back()->reg()))

333 continue;

334 } else {

335

336 auto *I =

337 std::lower_bound(Candidates.begin(), &C, MinInd,

338 [this](const Candidate &C, SlotIndex I) {

339 return LIS->getInstructionIndex(*C.first) < I;

340 });

341 for (auto *E = Candidates.end();

343 ++I) {

344 if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {

346 LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);

347 }

348 }

349 }

350

352 for (unsigned I = 0; I < Info->VAddrOperands; ++I)

353 if (VRM->hasPhys(Intervals[I]->reg()))

355

356 for (unsigned I = 0; I < Info->VAddrOperands; ++I)

357 LRM->assign(*Intervals[I], OrigRegs[I]);

358

359 continue;

360 }

361

362 C.second = true;

363 ++NumNSAConverted;

365 dbgs() << "\tNew allocation:\t\t ["

367 << " : "

369 << "]\n");

371 }

372

374}

375

376bool GCNNSAReassignLegacy::runOnMachineFunction(MachineFunction &MF) {

377 auto *VRM = &getAnalysis().getVRM();

378 auto *LRM = &getAnalysis().getLRM();

379 auto *LIS = &getAnalysis().getLIS();

380

381 GCNNSAReassignImpl Impl(VRM, LRM, LIS);

382 return Impl.run(MF);

383}

384

385PreservedAnalyses

391

392 GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);

393 Impl.run(MF);

395}

unsigned const MachineRegisterInfo * MRI

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Analysis containing CSE Info

AMD GCN specific subclass of TargetSubtarget.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Interface definition for SIRegisterInfo.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition GCNNSAReassign.cpp:386

bool hasNonNSAEncoding() const

const SIRegisterInfo * getRegisterInfo() const override

unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const

bool hasNSAEncoding() const

LiveInterval - This class represents the liveness of a register, or stack slot.

bool hasInterval(Register Reg) const

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

LiveInterval & getInterval(Register Reg)

SlotIndex beginIndex() const

beginIndex - Return the lowest numbered slot covered.

SlotIndex endIndex() const

endNumber - return the maximum point of the range of the whole, exclusive.

void unassign(const LiveInterval &VirtReg)

Unassign VirtReg from its PhysReg.

bool isPhysRegUsed(MCRegister PhysReg) const

Returns true if the given PhysReg has any live intervals assigned.

void assign(const LiveInterval &VirtReg, MCRegister PhysReg)

Assign VirtReg to PhysReg.

static MCRegister from(unsigned Val)

Check the provided unsigned value is a valid MCRegister.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineOperand & getOperand(unsigned i) const

Register getReg() const

getReg - Returns the register number.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

unsigned getOccupancy() const

unsigned getDynamicVGPRBlockSize() const

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

Register getPreSplitReg(Register virtReg) const

returns the live interval virtReg is split from.

MCRegister getPhys(Register virtReg) const

returns the physical register mapped to the specified virtual register

bool hasPhys(Register virtReg) const

returns true if the specified virtual register is mapped to a physical register

bool isAssignedReg(Register virtReg) const

returns true if the specified virtual register is not mapped to a stack slot or rematerialized.

LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Fast

Attempts to make calls as fast as possible (e.g.

@ C

The default llvm calling convention, compatible with C.

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

char & GCNNSAReassignID

Definition GCNNSAReassign.cpp:113

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

DWARFExpression::Operation Op

void initializeGCNNSAReassignLegacyPass(PassRegistry &)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.