LLVM: lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

23

24using namespace llvm;

25

26#define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"

27

28namespace {

29

30class SIOptimizeExecMaskingPreRA {

31private:

38

41

44

45public:

51};

52

54public:

55 static char ID;

56

60 }

61

63

64 StringRef getPassName() const override {

65 return "SI optimize exec mask operations pre-RA";

66 }

67

68 void getAnalysisUsage(AnalysisUsage &AU) const override {

72 }

73};

74

75}

76

78 "SI optimize exec mask operations pre-RA", false, false)

81 "SI optimize exec mask operations pre-RA", false, false)

82

83char SIOptimizeExecMaskingPreRALegacy::ID = 0;

84

86

88 return new SIOptimizeExecMaskingPreRALegacy();

89}

90

91

92

98

99

105

106 if (Reg.isVirtual())

108

109 for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg())) {

111 return true;

112 }

113

114 return false;

115}

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {

135 unsigned Opc = MI.getOpcode();

136 return Opc == AMDGPU::S_CBRANCH_VCCZ ||

137 Opc == AMDGPU::S_CBRANCH_VCCNZ; });

139 return false;

140

141 auto *And =

142 TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS);

143 if (And || And->getOpcode() != LMC.AndOpc || And->getOperand(1).isReg() ||

144 And->getOperand(2).isReg())

145 return false;

146

147 MachineOperand *AndCC = &And->getOperand(1);

149 unsigned CmpSubReg = AndCC->getSubReg();

150 if (CmpReg == Register(ExecReg)) {

151 AndCC = &And->getOperand(2);

152 CmpReg = AndCC->getReg();

154 } else if (And->getOperand(2).getReg() != Register(ExecReg)) {

155 return false;

156 }

157

158 auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS);

159 if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||

160 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||

161 Cmp->getParent() != And->getParent())

162 return false;

163

164 MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);

165 MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);

169 return false;

170

173 return false;

174

175 auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);

176 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)

177 return false;

178

179 if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||

180 TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))

181 return false;

182

183 Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);

184 Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);

185 MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);

188 return false;

189

191

192

193

195 return false;

196

197

198

200 LiveInterval *SelLI = &LIS->getInterval(SelReg);

202 [](const VNInfo *VNI) {

203 return VNI->isPHIDef();

204 }))

205 return false;

206

207

208 LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'

209 << *And);

210

211 MachineInstr *Andn2 =

213 And->getOperand(0).getReg())

218 MachineOperand &Andn2SCC = Andn2->getOperand(3);

221

223 And->eraseFromParent();

224

226

227

228

231 LiveInterval &CCLI = LIS->getInterval(CCReg);

233 if (CCQ.valueIn()) {

236 }

237 } else

239

240

241

242 LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;

244 (CmpReg == Register(CondReg) &&

245 std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),

246 [&](const MachineInstr &MI) {

247 return MI.readsRegister(CondReg, TRI);

248 }))) {

250 if (CmpLI)

253 Cmp->eraseFromParent();

254

255

256

260 if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {

262

265 bool ShrinkSel = Sel->getOperand(0).readsReg();

266 Sel->eraseFromParent();

267 if (ShrinkSel) {

268

269

271 }

272 }

273 }

274

275 return true;

276}

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {

294 return false;

295

296

298 MachineInstr &SaveExecMI = *First;

300 return false;

301

303 return MI.getOpcode() == LMC.XorTermOpc;

304 });

306 return false;

307

308 MachineInstr &XorTermMI = *I;

310 return false;

311

314

315

316 MachineInstr *AndExecMI = nullptr;

317 I--;

318 while (I != First && !AndExecMI) {

319 if (I->getOpcode() == LMC.AndOpc && I->getOperand(0).getReg() == DstReg &&

320 I->getOperand(1).getReg() == Register(ExecReg))

321 AndExecMI = &*I;

322 I--;

323 }

324 if (!AndExecMI)

325 return false;

326

327

328

329

330

333 for (MCRegUnit Unit : TRI->regunits(ExecReg)) {

335 if (RegUnit.find(StartIdx) != std::prev(RegUnit.find(EndIdx)))

336 return false;

337 }

338

339

342

344

347

349

350 return true;

351}

352

353PreservedAnalyses

357 SIOptimizeExecMaskingPreRA(MF, &LIS).run(MF);

359}

360

361bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(

364 return false;

365

366 auto *LIS = &getAnalysis().getLIS();

367 return SIOptimizeExecMaskingPreRA(MF, LIS).run(MF);

368}

369

370bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) {

373

376

377 for (MachineBasicBlock &MBB : MF) {

378

379 if (optimizeElseBranch(MBB)) {

380 RecalcRegs.insert(AMDGPU::SCC);

382 }

383

384 if (optimizeVcndVcmpPair(MBB)) {

385 RecalcRegs.insert(AMDGPU::VCC_LO);

386 RecalcRegs.insert(AMDGPU::VCC_HI);

387 RecalcRegs.insert(AMDGPU::SCC);

389 }

390

391

394 continue;

395

396

397

398

399

401 if (Term.getOpcode() != AMDGPU::S_ENDPGM || Term.getNumOperands() != 1)

402 continue;

403

404 SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});

405

406 while (!Blocks.empty()) {

407 auto *CurBB = Blocks.pop_back_val();

408 auto I = CurBB->rbegin(), E = CurBB->rend();

409 if (I != E) {

410 if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)

411 ++I;

412 else if (I->isBranch())

413 continue;

414 }

415

416 while (I != E) {

417 if (I->isDebugInstr()) {

418 I = std::next(I);

419 continue;

420 }

421

422 if (I->mayStore() || I->isBarrier() || I->isCall() ||

423 I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())

424 break;

425

427 << "Removing no effect instruction: " << *I << '\n');

428

429 for (auto &Op : I->operands()) {

430 if (Op.isReg())

431 RecalcRegs.insert(Op.getReg());

432 }

433

434 auto Next = std::next(I);

436 I->eraseFromParent();

438

440 }

441

442 if (I != E)

443 continue;

444

445

446 for (auto *Pred : CurBB->predecessors()) {

447 if (Pred->succ_size() == 1)

448 Blocks.push_back(Pred);

449 }

450 }

451 continue;

452 }

453

454

455

456

457

458

459

460

461 unsigned ScanThreshold = 10;

463 && ScanThreshold--; ++I) {

464

465 if (!(I->isFullCopy() && I->getOperand(1).getReg() == Register(ExecReg)))

466 continue;

467

468 Register SavedExec = I->getOperand(0).getReg();

469 if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec)) {

470 MachineInstr *SingleExecUser = &*MRI->use_instr_nodbg_begin(SavedExec);

472 nullptr);

476 TII->isOperandLegal(*SingleExecUser, Idx, &I->getOperand(1))) {

477 LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n');

479 I->eraseFromParent();

480 MRI->replaceRegWith(SavedExec, ExecReg);

483 }

484 }

485 break;

486 }

487 }

488

490 for (auto Reg : RecalcRegs) {

493 if (MRI->reg_empty(Reg))

495 } else {

497 }

498 }

499 }

500

502}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

const HexagonInstrInfo * TII

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static bool isDefBetween(Register Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)

static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, SlotIndex SelIdx)

Definition SIOptimizeExecMaskingPreRA.cpp:93

SI Optimize VGPR LiveRange

static const LaneMaskConstants & get(const GCNSubtarget &ST)

const unsigned OrSaveExecOpc

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

Implements a dense probed hash-table based set.

FunctionPass class - This class is used to implement most global optimizations.

LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

void removeAllRegUnitsForPhysReg(MCRegister Reg)

Remove associated live ranges for the register units associated with Reg.

SlotIndex getInstructionIndex(const MachineInstr &Instr) const

Returns the base index of the given instruction.

void RemoveMachineInstrFromMaps(MachineInstr &MI)

LiveInterval & getInterval(Register Reg)

void removeInterval(Register Reg)

Interval removal.

LiveRange & getRegUnit(MCRegUnit Unit)

Return the live range for register unit Unit.

LLVM_ABI void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos)

Remove value number and related live segments of LI and its subranges that start at position Pos.

LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)

After removing some uses of a register, shrink its live range to just the remaining uses.

LiveInterval & createAndComputeVirtRegInterval(Register Reg)

SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)

Result of a LiveRange query.

bool isDeadDef() const

Return true if this instruction has a dead def.

VNInfo * valueIn() const

Return the value that is live-in to the instruction.

VNInfo * valueOut() const

Return the value leaving the instruction, if any.

bool isKill() const

Return true if the live-in value is killed by this instruction.

This class represents the liveness of a register, stack slot, etc.

iterator_range< vni_iterator > vnis()

LiveQueryResult Query(SlotIndex Idx) const

Query Liveness at Idx.

LLVM_ABI iterator find(SlotIndex Pos)

find - Return an iterator pointing to the first segment that ends after Pos, or end().

Wrapper class representing physical registers. Should be passed by value.

static MCRegister from(unsigned Val)

Check the provided unsigned value is a valid MCRegister.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

iterator_range< iterator > terminators()

reverse_iterator rbegin()

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

LLVM_ABI int findRegisterUseOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false) const

Returns the operand index that is a use of the specific register or -1 if it is not found.

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

const MachineOperand & getOperand(unsigned i) const

unsigned getSubReg() const

bool isReg() const

isReg - Tests if this is a MO_Register operand.

void setIsDead(bool Val=true)

LLVM_ABI void setReg(Register Reg)

Change the register this operand corresponds to.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Wrapper class representing virtual and physical registers.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition SIOptimizeExecMaskingPreRA.cpp:354

SlotIndex - An opaque wrapper around machine indexes.

SlotIndex getRegSlot(bool EC=false) const

Returns the register use/def slot in the current instruction for a normal or early-clobber def.

StringRef - Represent a constant reference to a string, i.e.

self_iterator getIterator()

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

char & SIOptimizeExecMaskingPreRAID

Definition SIOptimizeExecMaskingPreRA.cpp:85

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

unsigned getUndefRegState(bool B)

@ And

Bitwise or logical AND of integers.

FunctionAddr VTableAddr Next

DWARFExpression::Operation Op

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

FunctionPass * createSIOptimizeExecMaskingPreRAPass()

Definition SIOptimizeExecMaskingPreRA.cpp:87

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.