LLVM: lib/Target/AMDGPU/SIModeRegister.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

21#include

22

23#define DEBUG_TYPE "si-mode-register"

24

25STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");

26

27using namespace llvm;

28

30

31

34

36

37 Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {

39 };

40

41

42

45 }

46

47

48

51 }

52

53

54

57 unsigned NewMode = (Mode & NewMask);

58 return Status(NewMask, NewMode);

59 }

60

61

64 }

65

68 }

69

71

74 }

75

77};

78

80public:

81

82

84

85

86

88

89

90

92

93

94

96

97

98

100

101

102

104

106};

107

108namespace {

109

111public:

112 static char ID;

113

114 std::vector<std::unique_ptr> BlockInfo;

115 std::queue<MachineBasicBlock *> Phase2List;

116

117

118

119

120

121

123 Status DefaultStatus =

125

126 bool Changed = false;

127

128public:

130

132

136 }

137

139

141

143

145

148};

149}

150

152 "Insert required mode register values", false, false)

153

154char SIModeRegister::ID = 0;

155

157

159

160

161

162

163

166 unsigned Opcode = MI.getOpcode();

167 if (TII->usesFPDPRounding(MI) ||

168 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||

169 Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {

170 switch (Opcode) {

171 case AMDGPU::V_INTERP_P1LL_F16:

172 case AMDGPU::V_INTERP_P1LV_F16:

173 case AMDGPU::V_INTERP_P2_F16:

174

177 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {

178 unsigned Mode = MI.getOperand(2).getImm();

179 MI.removeOperand(2);

180

181 if (TII->getSubtarget().hasTrue16BitInsts()) {

184 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));

186 MI.removeOperand(1);

187 B.addImm(0);

188 B.add(Src0);

189 B.addImm(0);

190 B.addImm(0);

191 } else

192 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));

194 }

195 case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {

196 unsigned Mode = MI.getOperand(2).getImm();

197 MI.removeOperand(2);

198 MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));

200 }

201 default:

202 return DefaultStatus;

203 }

204 }

206}

207

208

209

210

211

212

215 while (InstrMode.Mask) {

216 unsigned Offset = llvm::countr_zero(InstrMode.Mask);

217 unsigned Width = llvm::countr_one(InstrMode.Mask >> Offset);

218 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);

219 using namespace AMDGPU::Hwreg;

220 BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))

222 .addImm(HwregEncoding::encode(ID_MODE, Offset, Width));

223 ++NumSetregInserted;

224 Changed = true;

225 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);

226 }

227}

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

250 auto NewInfo = std::make_unique();

252

253

254

255

256

257 bool RequirePending = true;

260 Status InstrMode = getInstructionMode(MI, TII);

261 if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||

262 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||

263 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||

264 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {

265

266

267

268 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();

269 using namespace AMDGPU::Hwreg;

270 auto [Id, Offset, Width] = HwregEncoding::decode(Dst);

271 if (Id != ID_MODE)

272 continue;

273

274 unsigned Mask = maskTrailingOnes(Width) << Offset;

275

276

280 }

281

282

283

284 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||

285 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {

286 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();

287 unsigned Mode = (Val << Offset) & Mask;

289

290

291 RequirePending = false;

292 NewInfo->Change = NewInfo->Change.merge(Setreg);

293 } else {

294 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);

295 }

296 } else if (!NewInfo->Change.isCompatible(InstrMode)) {

297

298

300

301

302

304 if (RequirePending) {

305

306

307

309 NewInfo->Require = NewInfo->Change;

310 RequirePending = false;

311 } else {

313 IPChange.delta(NewInfo->Change));

314 IPChange = NewInfo->Change;

315 }

316

318 }

319 NewInfo->Change = NewInfo->Change.merge(InstrMode);

320 } else {

321

322

324 IPChange = NewInfo->Change;

325 NewInfo->Change = NewInfo->Change.merge(InstrMode);

326 }

327 }

328 }

329 if (RequirePending) {

330

331

333 NewInfo->Require = NewInfo->Change;

335

337 }

338 NewInfo->Exit = NewInfo->Change;

339 BlockInfo[MBB.getNumber()] = std::move(NewInfo);

340}

341

342

343

344

345

348 bool RevisitRequired = false;

349 bool ExitSet = false;

352

353 BlockInfo[ThisBlock]->Pred = DefaultStatus;

354 ExitSet = true;

355 } else {

356

357

358

359

360

361

362

363

364

365

368 unsigned PredBlock = PB.getNumber();

369 if ((ThisBlock == PredBlock) && (std::next(P) == E)) {

370 BlockInfo[ThisBlock]->Pred = DefaultStatus;

371 ExitSet = true;

372 } else if (BlockInfo[PredBlock]->ExitSet) {

373 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;

374 ExitSet = true;

375 } else if (PredBlock != ThisBlock)

376 RevisitRequired = true;

377

378 for (P = std::next(P); P != E; P = std::next(P)) {

380 unsigned PredBlock = Pred->getNumber();

381 if (BlockInfo[PredBlock]->ExitSet) {

382 if (BlockInfo[ThisBlock]->ExitSet) {

383 BlockInfo[ThisBlock]->Pred =

384 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);

385 } else {

386 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;

387 }

388 ExitSet = true;

389 } else if (PredBlock != ThisBlock)

390 RevisitRequired = true;

391 }

392 }

394 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);

395 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {

396 BlockInfo[ThisBlock]->Exit = TmpStatus;

397

398

400 Phase2List.push(Succ);

401 }

402 BlockInfo[ThisBlock]->ExitSet = ExitSet;

403 if (RevisitRequired)

404 Phase2List.push(&MBB);

405}

406

407

408

409

413 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {

415 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);

416 if (BlockInfo[ThisBlock]->FirstInsertionPoint)

417 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);

418 else

420 }

421}

422

423bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {

424

425

426

427

428

430 if (F.hasFnAttribute(llvm::Attribute::StrictFP))

431 return Changed;

435

436

437

438

439

441 processBlockPhase1(BB, TII);

442

443

444

445

447 Phase2List.push(&BB);

448 while (!Phase2List.empty()) {

449 processBlockPhase2(*Phase2List.front(), TII);

450 Phase2List.pop();

451 }

452

453

454

456 processBlockPhase3(BB, TII);

457

458 BlockInfo.clear();

459

460 return Changed;

461}

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

const HexagonInstrInfo * TII

PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

#define FP_ROUND_MODE_DP(x)

#define FP_ROUND_ROUND_TO_NEAREST

#define FP_ROUND_ROUND_TO_ZERO

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

MachineInstr * FirstInsertionPoint

Represent the analysis usage information of a pass.

void setPreservesCFG()

This function should be called by the pass, iff they do not:

FunctionPass class - This class is used to implement most global optimizations.

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator

pred_iterator pred_begin()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

iterator_range< succ_iterator > successors()

MachineInstr & instr_front()

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

unsigned getNumBlockIDs() const

getNumBlockIDs - Return the number of MBB ID's allocated.

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

Representation of each machine instruction.

MachineOperand class - Representation of each machine instruction operand.

LLVM Value Representation.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

FunctionPass * createSIModeRegisterPass()

Status delta(const Status &S) const

Status(unsigned NewMask, unsigned NewMode)

bool isCombinable(Status &S)

bool operator==(const Status &S) const

bool isCompatible(Status &S)

Status merge(const Status &S) const

Status intersect(const Status &S) const

bool operator!=(const Status &S) const

Status mergeUnknown(unsigned newMask)