LLVM: lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

20#include

21

22using namespace llvm;

23

24#define DEBUG_TYPE "r600cf"

25

26namespace {

27

28struct CFStack {

29 enum StackItem {

31 SUB_ENTRY = 1,

32 FIRST_NON_WQM_PUSH = 2,

33 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3

34 };

35

37 std::vector BranchStack;

38 std::vector LoopStack;

39 unsigned MaxStackSize;

40 unsigned CurrentEntries = 0;

41 unsigned CurrentSubEntries = 0;

42

44

46

47 unsigned getLoopDepth();

48 bool branchStackContains(CFStack::StackItem);

49 bool requiresWorkAroundForInst(unsigned Opcode);

50 unsigned getSubEntrySize(CFStack::StackItem Item);

51 void updateMaxStackSize();

52 void pushBranch(unsigned Opcode, bool isWQM = false);

53 void pushLoop();

54 void popBranch();

55 void popLoop();

56};

57

58unsigned CFStack::getLoopDepth() {

59 return LoopStack.size();

60}

61

62bool CFStack::branchStackContains(CFStack::StackItem Item) {

64}

65

66bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {

67 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&

68 getLoopDepth() > 1)

69 return true;

70

72 return false;

73

74 switch(Opcode) {

75 default: return false;

76 case R600::CF_ALU_PUSH_BEFORE:

77 case R600::CF_ALU_ELSE_AFTER:

78 case R600::CF_ALU_BREAK:

79 case R600::CF_ALU_CONTINUE:

80 if (CurrentSubEntries == 0)

81 return false;

83

84

85

86

87

88

89

90

91 return CurrentSubEntries > 3;

92 }

94

95

96

97

98

99 return CurrentSubEntries > 7;

100 }

101}

102

103unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {

104 switch(Item) {

105 default:

106 return 0;

107 case CFStack::FIRST_NON_WQM_PUSH:

110

111

112 return 3;

113 }

114

115

116

117

118

119 return 2;

120 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:

122

123

124 return 2;

125 case CFStack::SUB_ENTRY:

126 return 1;

127 }

128}

129

130void CFStack::updateMaxStackSize() {

131 unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4);

132 MaxStackSize = std::max(CurrentStackSize, MaxStackSize);

133}

134

135void CFStack::pushBranch(unsigned Opcode, bool isWQM) {

136 CFStack::StackItem Item = CFStack::ENTRY;

137 switch(Opcode) {

138 case R600::CF_PUSH_EG:

139 case R600::CF_ALU_PUSH_BEFORE:

140 if (!isWQM) {

142 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))

143 Item = CFStack::FIRST_NON_WQM_PUSH;

144

145

146 else if (CurrentEntries > 0 &&

149 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))

150 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;

151 else

152 Item = CFStack::SUB_ENTRY;

153 } else

154 Item = CFStack::ENTRY;

155 break;

156 }

157 BranchStack.push_back(Item);

158 if (Item == CFStack::ENTRY)

159 CurrentEntries++;

160 else

161 CurrentSubEntries += getSubEntrySize(Item);

162 updateMaxStackSize();

163}

164

165void CFStack::pushLoop() {

166 LoopStack.push_back(CFStack::ENTRY);

167 CurrentEntries++;

168 updateMaxStackSize();

169}

170

171void CFStack::popBranch() {

172 CFStack::StackItem Top = BranchStack.back();

173 if (Top == CFStack::ENTRY)

174 CurrentEntries--;

175 else

176 CurrentSubEntries-= getSubEntrySize(Top);

177 BranchStack.pop_back();

178}

179

180void CFStack::popLoop() {

181 CurrentEntries--;

182 LoopStack.pop_back();

183}

184

186private:

187 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;

188

189 enum ControlFlowInstruction {

190 CF_TC,

191 CF_VC,

192 CF_CALL_FS,

193 CF_WHILE_LOOP,

194 CF_END_LOOP,

195 CF_LOOP_BREAK,

196 CF_LOOP_CONTINUE,

197 CF_JUMP,

198 CF_ELSE,

199 CF_POP,

200 CF_END

201 };

202

205 unsigned MaxFetchInst;

207

209 switch (MI.getOpcode()) {

210 case R600::KILL:

211 case R600::RETURN:

212 return true;

213 default:

214 return false;

215 }

216 }

217

218 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {

219 unsigned Opcode = 0;

221 switch (CFI) {

222 case CF_TC:

223 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;

224 break;

225 case CF_VC:

226 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;

227 break;

228 case CF_CALL_FS:

229 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;

230 break;

231 case CF_WHILE_LOOP:

232 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;

233 break;

234 case CF_END_LOOP:

235 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;

236 break;

237 case CF_LOOP_BREAK:

238 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;

239 break;

240 case CF_LOOP_CONTINUE:

241 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;

242 break;

243 case CF_JUMP:

244 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;

245 break;

246 case CF_ELSE:

247 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;

248 break;

249 case CF_POP:

250 Opcode = isEg ? R600::POP_EG : R600::POP_R600;

251 break;

252 case CF_END:

253 if (ST->hasCaymanISA()) {

254 Opcode = R600::CF_END_CM;

255 break;

256 }

257 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;

258 break;

259 }

260 assert (Opcode && "No opcode selected");

261 return TII->get(Opcode);

262 }

263

265 std::set &DstRegs) const {

266 unsigned DstMI, SrcMI;

268 E = MI.operands_end();

269 I != E; ++I) {

272 continue;

273 if (MO.isDef()) {

275 if (R600::R600_Reg128RegClass.contains(Reg))

276 DstMI = Reg;

277 else

278 DstMI = TRI->getMatchingSuperReg(Reg,

280 &R600::R600_Reg128RegClass);

281 }

282 if (MO.isUse()) {

284 if (R600::R600_Reg128RegClass.contains(Reg))

285 SrcMI = Reg;

286 else

287 SrcMI = TRI->getMatchingSuperReg(Reg,

289 &R600::R600_Reg128RegClass);

290 }

291 }

292 if ((DstRegs.find(SrcMI) == DstRegs.end())) {

293 DstRegs.insert(DstMI);

294 return true;

295 }

296 return false;

297 }

298

299 ClauseFile

301 const {

303 std::vector<MachineInstr *> ClauseContent;

304 unsigned AluInstCount = 0;

305 bool IsTex = TII->usesTextureCache(*ClauseHead);

306 std::set DstRegs;

308 if (IsTrivialInst(*I))

309 continue;

310 if (AluInstCount >= MaxFetchInst)

311 break;

312 if ((IsTex && TII->usesTextureCache(*I)) ||

313 (!IsTex && TII->usesVertexCache(*I)))

314 break;

315 if (!isCompatibleWithClause(*I, DstRegs))

316 break;

317 AluInstCount ++;

318 ClauseContent.push_back(&*I);

319 }

321 getHWInstrDesc(IsTex?CF_TC:CF_VC))

323 .addImm(AluInstCount - 1);

324 return ClauseFile(MIb, std::move(ClauseContent));

325 }

326

327 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {

328 static const unsigned LiteralRegs[] = {

329 R600::ALU_LITERAL_X,

330 R600::ALU_LITERAL_Y,

331 R600::ALU_LITERAL_Z,

332 R600::ALU_LITERAL_W

333 };

335 TII->getSrcs(MI);

336 for (const auto &Src:Srcs) {

337 if (Src.first->getReg() != R600::ALU_LITERAL_X)

338 continue;

339 int64_t Imm = Src.second;

340 std::vector<MachineOperand *>::iterator It =

342 return val->isImm() && (val->getImm() == Imm);

343 });

344

345

347 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));

348

349 if (It != Lits.end()) {

350

351 unsigned Index = It - Lits.begin();

352 Src.first->setReg(LiteralRegs[Index]);

353 } else {

354

355 assert(Lits.size() < 4 && "Too many literals in Instruction Group");

356 Src.first->setReg(LiteralRegs[Lits.size()]);

357 Lits.push_back(&Operand);

358 }

359 }

360 }

361

364 const std::vector &Literals) const {

366 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {

367 unsigned LiteralPair0 = Literals[i];

368 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;

369 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),

370 TII->get(R600::LITERALS))

371 .addImm(LiteralPair0)

372 .addImm(LiteralPair1);

373 }

374 return InsertPos;

375 }

376

377 ClauseFile

379 const {

381 std::vector<MachineInstr *> ClauseContent;

382 I++;

384 if (IsTrivialInst(*I)) {

385 ++I;

386 continue;

387 }

388 if (I->isBundle() && TII->isALUInstr(I->getOpcode()))

389 break;

390 std::vector<MachineOperand *>Literals;

391 if (I->isBundle()) {

394 while (++BI != E && BI->isBundledWithPred()) {

395 BI->unbundleFromPred();

397 if (MO.isReg() && MO.isInternalRead())

398 MO.setIsInternalRead(false);

399 }

400 getLiteral(*BI, Literals);

401 ClauseContent.push_back(&*BI);

402 }

403 I = BI;

405 } else {

406 getLiteral(*I, Literals);

407 ClauseContent.push_back(&*I);

408 I++;

409 }

410 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {

412 TII->get(R600::LITERALS));

413 if (Literals[i]->isImm()) {

415 } else {

418 }

419 if (i + 1 < e) {

420 if (Literals[i + 1]->isImm()) {

422 } else {

425 }

426 } else

428 ClauseContent.push_back(MILit);

429 }

430 }

431 assert(ClauseContent.size() < 128 && "ALU clause is too big");

433 return ClauseFile(&ClauseHead, std::move(ClauseContent));

434 }

435

438 unsigned &CfCount) {

439 CounterPropagateAddr(*Clause.first, CfCount);

443 BB->splice(InsertPos, BB, MI);

444 CfCount += 2 * Clause.second.size();

445 }

446

448 ClauseFile &Clause, unsigned &CfCount) {

449 Clause.first->getOperand(0).setImm(0);

450 CounterPropagateAddr(*Clause.first, CfCount);

454 BB->splice(InsertPos, BB, MI);

455 CfCount += Clause.second.size();

456 }

457

458 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {

459 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());

460 }

461 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,

462 unsigned Addr) const {

464 CounterPropagateAddr(*MI, Addr);

465 }

466 }

467

468public:

469 static char ID;

470

472

473 bool runOnMachineFunction(MachineFunction &MF) override {

475 MaxFetchInst = ST->getTexVTXClauseSize();

476 TII = ST->getInstrInfo();

477 TRI = ST->getRegisterInfo();

478

480

483 ++MB) {

485 unsigned CfCount = 0;

486 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;

487 std::vector<MachineInstr * > IfThenElseStack;

490 getHWInstrDesc(CF_CALL_FS));

491 CfCount++;

492 }

493 std::vector FetchClauses, AluClauses;

494 std::vector<MachineInstr *> LastAlu(1);

495 std::vector<MachineInstr *> ToPopAfter;

496

498 I != E;) {

499 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {

501 FetchClauses.push_back(MakeFetchClause(MBB, I));

502 CfCount++;

503 LastAlu.back() = nullptr;

504 continue;

505 }

506

508 if (MI->getOpcode() != R600::ENDIF)

509 LastAlu.back() = nullptr;

510 if (MI->getOpcode() == R600::CF_ALU)

511 LastAlu.back() = &*MI;

512 I++;

513 bool RequiresWorkAround =

514 CFStack.requiresWorkAroundForInst(MI->getOpcode());

515 switch (MI->getOpcode()) {

516 case R600::CF_ALU_PUSH_BEFORE:

517 if (RequiresWorkAround) {

519 << "Applying bug work-around for ALU_PUSH_BEFORE\n");

523 MI->setDesc(TII->get(R600::CF_ALU));

524 CfCount++;

525 CFStack.pushBranch(R600::CF_PUSH_EG);

526 } else

527 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);

528 [[fallthrough]];

529 case R600::CF_ALU:

531 AluClauses.push_back(MakeALUClause(MBB, I));

533 CfCount++;

534 break;

535 case R600::WHILELOOP: {

536 CFStack.pushLoop();

538 getHWInstrDesc(CF_WHILE_LOOP))

540 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,

541 std::set<MachineInstr *>());

542 Pair.second.insert(MIb);

543 LoopStack.push_back(std::move(Pair));

544 MI->eraseFromParent();

545 CfCount++;

546 break;

547 }

548 case R600::ENDLOOP: {

549 CFStack.popLoop();

550 std::pair<unsigned, std::set<MachineInstr *>> Pair =

551 std::move(LoopStack.back());

552 LoopStack.pop_back();

553 CounterPropagateAddr(Pair.second, CfCount);

554 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))

555 .addImm(Pair.first + 1);

556 MI->eraseFromParent();

557 CfCount++;

558 break;

559 }

560 case R600::IF_PREDICATE_SET: {

561 LastAlu.push_back(nullptr);

563 getHWInstrDesc(CF_JUMP))

566 IfThenElseStack.push_back(MIb);

568 MI->eraseFromParent();

569 CfCount++;

570 break;

571 }

572 case R600::ELSE: {

573 MachineInstr * JumpInst = IfThenElseStack.back();

574 IfThenElseStack.pop_back();

575 CounterPropagateAddr(*JumpInst, CfCount);

577 getHWInstrDesc(CF_ELSE))

581 IfThenElseStack.push_back(MIb);

582 MI->eraseFromParent();

583 CfCount++;

584 break;

585 }

586 case R600::ENDIF: {

587 CFStack.popBranch();

588 if (LastAlu.back()) {

589 ToPopAfter.push_back(LastAlu.back());

590 } else {

592 getHWInstrDesc(CF_POP))

595 (void)MIb;

597 CfCount++;

598 }

599

600 MachineInstr *IfOrElseInst = IfThenElseStack.back();

601 IfThenElseStack.pop_back();

602 CounterPropagateAddr(*IfOrElseInst, CfCount);

604 LastAlu.pop_back();

605 MI->eraseFromParent();

606 break;

607 }

608 case R600::BREAK: {

609 CfCount ++;

611 getHWInstrDesc(CF_LOOP_BREAK))

613 LoopStack.back().second.insert(MIb);

614 MI->eraseFromParent();

615 break;

616 }

617 case R600::CONTINUE: {

619 getHWInstrDesc(CF_LOOP_CONTINUE))

621 LoopStack.back().second.insert(MIb);

622 MI->eraseFromParent();

623 CfCount++;

624 break;

625 }

626 case R600::RETURN: {

629 CfCount++;

630 if (CfCount % 2) {

632 CfCount++;

633 }

634 MI->eraseFromParent();

635 for (ClauseFile &CF : FetchClauses)

636 EmitFetchClause(I, DL, CF, CfCount);

637 for (ClauseFile &CF : AluClauses)

638 EmitALUClause(I, DL, CF, CfCount);

639 break;

640 }

641 default:

642 if (TII->isExport(MI->getOpcode())) {

644 CfCount++;

645 }

646 break;

647 }

648 }

651 TII->get(R600::CF_ALU_POP_AFTER))

652 .addImm(Alu->getOperand(0).getImm())

653 .addImm(Alu->getOperand(1).getImm())

654 .addImm(Alu->getOperand(2).getImm())

655 .addImm(Alu->getOperand(3).getImm())

656 .addImm(Alu->getOperand(4).getImm())

657 .addImm(Alu->getOperand(5).getImm())

658 .addImm(Alu->getOperand(6).getImm())

659 .addImm(Alu->getOperand(7).getImm())

660 .addImm(Alu->getOperand(8).getImm());

661 Alu->eraseFromParent();

662 }

664 }

665

666 return false;

667 }

668

669 StringRef getPassName() const override {

670 return "R600 Control Flow Finalizer Pass";

671 }

672};

673

674}

675

677 "R600 Control Flow Finalizer", false, false)

680

681char R600ControlFlowFinalizer::ID = 0;

682

684

686 return new R600ControlFlowFinalizer();

687}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

#define ENTRY(ASMNAME, ENUM)

Register const TargetRegisterInfo * TRI

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Provides R600 specific target descriptions.

AMDGPU R600 specific subclass of TargetSubtarget.

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

unsigned getWavefrontSize() const

FunctionPass class - This class is used to implement most global optimizations.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

Describe properties that are true of each instruction in the target description file.

Instructions::iterator instr_iterator

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

BasicBlockListType::iterator iterator

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const

Representation of each machine instruction.

const MachineOperand * const_mop_iterator

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

LLVM_ABI void dump() const

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

Register getReg() const

getReg - Returns the register number.

bool hasCaymanISA() const

Generation getGeneration() const

Wrapper class representing virtual and physical registers.

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)

MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

FunctionPass * createR600ControlFlowFinalizer()

Definition R600ControlFlowFinalizer.cpp:685

char & R600ControlFlowFinalizerID

Definition R600ControlFlowFinalizer.cpp:683

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

static unsigned getSubRegFromChannel(unsigned Channel)