LLVM: lib/Target/AMDGPU/SIModeRegister.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

21#include

22

23#define DEBUG_TYPE "si-mode-register"

24

25STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");

26

27using namespace llvm;

28

30

31

34

36

37 Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {

39 };

40

41

42

46

47

48

52

53

54

57 unsigned NewMode = (Mode & NewMask);

58 return Status(NewMask, NewMode);

59 }

60

61

65

69

71

75

77};

78

80public:

81

82

84

85

86

88

89

90

92

93

94

96

97

98

100

101

102

104

106};

107

108namespace {

109

110class SIModeRegister {

111public:

112 std::vector<std::unique_ptr> BlockInfo;

113 std::queue<MachineBasicBlock *> Phase2List;

114

115

116

117

118

119

121 Status DefaultStatus =

123

125

127

129

131

133

135

138};

139

141public:

142 static char ID;

143

144 SIModeRegisterLegacy() : MachineFunctionPass(ID) {}

145

146 bool runOnMachineFunction(MachineFunction &MF) override;

147

148 void getAnalysisUsage(AnalysisUsage &AU) const override {

151 }

152};

153}

154

156 "Insert required mode register values", false, false)

157

158char SIModeRegisterLegacy::ID = 0;

159

161

163 return new SIModeRegisterLegacy();

164}

165

166

167

168

169

172 unsigned Opcode = MI.getOpcode();

173 if (TII->usesFPDPRounding(MI) ||

174 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||

175 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||

176 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||

177 Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {

178 switch (Opcode) {

179 case AMDGPU::V_INTERP_P1LL_F16:

180 case AMDGPU::V_INTERP_P1LV_F16:

181 case AMDGPU::V_INTERP_P2_F16:

182

185 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {

186 unsigned Mode = MI.getOperand(2).getImm();

187 MI.removeOperand(2);

188 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));

190 }

191 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {

192 unsigned Mode = MI.getOperand(2).getImm();

193 MI.removeOperand(2);

194 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));

196 }

197 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {

198 unsigned Mode = MI.getOperand(6).getImm();

199 MI.removeOperand(6);

200 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));

202 }

203 case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {

204 unsigned Mode = MI.getOperand(2).getImm();

205 MI.removeOperand(2);

206 MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));

208 }

209 default:

210 return DefaultStatus;

211 }

212 }

213 return Status();

214}

215

216

217

218

219

220

221void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,

222 const SIInstrInfo *TII, Status InstrMode) {

223 while (InstrMode.Mask) {

226 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);

227 using namespace AMDGPU::Hwreg;

230 .addImm(HwregEncoding::encode(ID_MODE, Offset, Width));

231 ++NumSetregInserted;

233 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);

234 }

235}

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,

257 const SIInstrInfo *TII) {

258 auto NewInfo = std::make_unique();

259 MachineInstr *InsertionPoint = nullptr;

260

261

262

263

264

265 bool RequirePending = true;

266 Status IPChange;

267 for (MachineInstr &MI : MBB) {

268 Status InstrMode = getInstructionMode(MI, TII);

269 if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||

270 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||

271 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||

272 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {

273

274

275

276 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();

277 using namespace AMDGPU::Hwreg;

278 auto [Id, Offset, Width] = HwregEncoding::decode(Dst);

279 if (Id != ID_MODE)

280 continue;

281

283

284

285 if (InsertionPoint) {

286 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));

287 InsertionPoint = nullptr;

288 }

289

290

291

292 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||

293 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {

294 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();

295 unsigned Mode = (Val << Offset) & Mask;

296 Status Setreg = Status(Mask, Mode);

297

298

299 RequirePending = false;

300 NewInfo->Change = NewInfo->Change.merge(Setreg);

301 } else {

302 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);

303 }

304 } else if (!NewInfo->Change.isCompatible(InstrMode)) {

305

306

307 if (InsertionPoint) {

308

309

310

312 if (RequirePending) {

313

314

315

316 NewInfo->FirstInsertionPoint = InsertionPoint;

317 NewInfo->Require = NewInfo->Change;

318 RequirePending = false;

319 } else {

320 insertSetreg(MBB, InsertionPoint, TII,

321 IPChange.delta(NewInfo->Change));

322 IPChange = NewInfo->Change;

323 }

324

325 InsertionPoint = &MI;

326 }

327 NewInfo->Change = NewInfo->Change.merge(InstrMode);

328 } else {

329

330

331 InsertionPoint = &MI;

332 IPChange = NewInfo->Change;

333 NewInfo->Change = NewInfo->Change.merge(InstrMode);

334 }

335 }

336 }

337 if (RequirePending) {

338

339

340 NewInfo->FirstInsertionPoint = InsertionPoint;

341 NewInfo->Require = NewInfo->Change;

342 } else if (InsertionPoint) {

343

344 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));

345 }

346 NewInfo->Exit = NewInfo->Change;

347 BlockInfo[MBB.getNumber()] = std::move(NewInfo);

348}

349

350

351

352

353

354void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,

355 const SIInstrInfo *TII) {

356 bool RevisitRequired = false;

357 bool ExitSet = false;

360

361 BlockInfo[ThisBlock]->Pred = DefaultStatus;

362 ExitSet = true;

363 } else {

364

365

366

367

368

369

370

371

372

373

375 MachineBasicBlock &PB = *(*P);

376 unsigned PredBlock = PB.getNumber();

377 if ((ThisBlock == PredBlock) && (std::next(P) == E)) {

378 BlockInfo[ThisBlock]->Pred = DefaultStatus;

379 ExitSet = true;

380 } else if (BlockInfo[PredBlock]->ExitSet) {

381 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;

382 ExitSet = true;

383 } else if (PredBlock != ThisBlock)

384 RevisitRequired = true;

385

386 for (P = std::next(P); P != E; P = std::next(P)) {

387 MachineBasicBlock *Pred = *P;

388 unsigned PredBlock = Pred->getNumber();

389 if (BlockInfo[PredBlock]->ExitSet) {

390 if (BlockInfo[ThisBlock]->ExitSet) {

391 BlockInfo[ThisBlock]->Pred =

392 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);

393 } else {

394 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;

395 }

396 ExitSet = true;

397 } else if (PredBlock != ThisBlock)

398 RevisitRequired = true;

399 }

400 }

401 Status TmpStatus =

402 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);

403 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {

404 BlockInfo[ThisBlock]->Exit = TmpStatus;

405

406

408 Phase2List.push(Succ);

409 }

410 BlockInfo[ThisBlock]->ExitSet = ExitSet;

411 if (RevisitRequired)

412 Phase2List.push(&MBB);

413}

414

415

416

417

418void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,

419 const SIInstrInfo *TII) {

421 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {

422 Status Delta =

423 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);

424 if (BlockInfo[ThisBlock]->FirstInsertionPoint)

425 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);

426 else

428 }

429}

430

431bool SIModeRegisterLegacy::runOnMachineFunction(MachineFunction &MF) {

432 return SIModeRegister().run(MF);

433}

434

437 if (!SIModeRegister().run(MF))

441 return PA;

442}

443

445

446

447

448

449

451 if (F.hasFnAttribute(llvm::Attribute::StrictFP))

456

457

458

459

460

462 processBlockPhase1(BB, TII);

463

464

465

466

468 Phase2List.push(&BB);

469 while (!Phase2List.empty()) {

470 processBlockPhase2(*Phase2List.front(), TII);

471 Phase2List.pop();

472 }

473

474

475

477 processBlockPhase3(BB, TII);

478

479 BlockInfo.clear();

480

482}

const TargetInstrInfo & TII

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))

#define FP_ROUND_MODE_DP(x)

#define FP_ROUND_ROUND_TO_NEAREST

#define FP_ROUND_ROUND_TO_ZERO

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

Status Change

Definition SIModeRegister.cpp:87

Status Pred

Definition SIModeRegister.cpp:95

MachineInstr * FirstInsertionPoint

Definition SIModeRegister.cpp:99

Status Exit

Definition SIModeRegister.cpp:91

Status Require

Definition SIModeRegister.cpp:83

bool ExitSet

Definition SIModeRegister.cpp:103

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

Represents analyses that only rely on functions' control flow.

FunctionPass class - This class is used to implement most global optimizations.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator

pred_iterator pred_begin()

iterator_range< succ_iterator > successors()

MachineInstr & instr_front()

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

unsigned getNumBlockIDs() const

getNumBlockIDs - Return the number of MBB ID's allocated.

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

Representation of each machine instruction.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM)

Definition SIModeRegister.cpp:435

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

FunctionPass * createSIModeRegisterPass()

Definition SIModeRegister.cpp:162

int countr_one(T Value)

Count the number of ones from the least significant bit to the first zero bit.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

constexpr T maskTrailingOnes(unsigned N)

Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.

Definition SIModeRegister.cpp:29

Status delta(const Status &S) const

Definition SIModeRegister.cpp:62

Status(unsigned NewMask, unsigned NewMode)

Definition SIModeRegister.cpp:37

bool isCombinable(Status &S)

Definition SIModeRegister.cpp:76

bool operator==(const Status &S) const

Definition SIModeRegister.cpp:66

bool isCompatible(Status &S)

Definition SIModeRegister.cpp:72

Status merge(const Status &S) const

Definition SIModeRegister.cpp:43

Status intersect(const Status &S) const

Definition SIModeRegister.cpp:55

bool operator!=(const Status &S) const

Definition SIModeRegister.cpp:70

unsigned Mask

Definition SIModeRegister.cpp:32

unsigned Mode

Definition SIModeRegister.cpp:33

Status mergeUnknown(unsigned newMask)

Definition SIModeRegister.cpp:49