LLVM: lib/Target/AMDGPU/SILowerI1Copies.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

28

29#define DEBUG_TYPE "si-i1-copies"

30

31using namespace llvm;

32

36

37namespace {

38

40public:

43

44private:

46

47public:

61

62 bool lowerCopiesFromI1();

63 bool lowerCopiesToI1();

64 bool cleanConstrainRegs(bool Changed);

65 bool isVreg1(Register Reg) const {

66 return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;

67 }

68};

69

70Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,

74

75bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {

77 for (Register Reg : ConstrainRegs)

78 MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);

79 ConstrainRegs.clear();

80

81 return Changed;

82}

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106class PhiIncomingAnalysis {

109

110

111

116

117public:

119 : PDT(PDT), TII(TII) {}

120

121

122

124 return ReachableMap.find(&MBB)->second;

125 }

126

128

131 ReachableMap.clear();

132 ReachableOrdered.clear();

133 Predecessors.clear();

134

135

136

137 ReachableMap.try_emplace(&DefBlock, false);

138 ReachableOrdered.push_back(&DefBlock);

139

140 for (auto Incoming : Incomings) {

142 if (MBB == &DefBlock) {

143 ReachableMap[&DefBlock] = true;

144 continue;

145 }

146

149

150

151

154 }

155

156 while (Stack.empty()) {

159 continue;

161

163 }

164

166 bool HaveReachablePred = false;

168 if (ReachableMap.count(Pred)) {

169 HaveReachablePred = true;

170 } else {

171 Stack.push_back(Pred);

172 }

173 }

174 if (!HaveReachablePred)

175 ReachableMap[MBB] = true;

176 if (HaveReachablePred) {

179 Predecessors.push_back(UnreachablePred);

180 }

181 }

183 }

184 }

185};

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219class LoopFinder {

222

223

224

225

227

228

229

231

232

234

235

236

237

238 unsigned FoundLoopLevel = ~0u;

239

243

244public:

246 : DT(DT), PDT(PDT) {}

247

250 CommonDominators.clear();

252 NextLevel.clear();

253 VisitedPostDom = nullptr;

254 FoundLoopLevel = ~0u;

255

256 DefBlock = &MBB;

257 }

258

259

260

261

262

265

266 if (!VisitedPostDom)

267 advanceLevel();

268

269 unsigned Level = 0;

270 while (PDNode->getBlock() != PostDom) {

271 if (PDNode->getBlock() == VisitedPostDom)

272 advanceLevel();

273 PDNode = PDNode->getIDom();

274 Level++;

275 if (FoundLoopLevel == Level)

276 return Level;

277 }

278

279 return 0;

280 }

281

282

283

284

289 assert(LoopLevel < CommonDominators.size());

290

292 for (auto &Incoming : Incomings)

294

295 if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {

298 } else {

299

300

302 if (!inLoopLevel(*Pred, LoopLevel, Incomings))

305 }

306 }

307 }

308

309private:

312 auto DomIt = Visited.find(&MBB);

313 if (DomIt != Visited.end() && DomIt->second <= LoopLevel)

314 return true;

315

316 for (auto &Incoming : Incomings)

318 return true;

319

320 return false;

321 }

322

323 void advanceLevel() {

325

326 if (!VisitedPostDom) {

327 VisitedPostDom = DefBlock;

328 VisitedDom = DefBlock;

329 Stack.push_back(DefBlock);

330 } else {

331 VisitedPostDom = PDT.getNode(VisitedPostDom)->getIDom()->getBlock();

332 VisitedDom = CommonDominators.back();

333

334 for (unsigned i = 0; i < NextLevel.size();) {

335 if (PDT.dominates(VisitedPostDom, NextLevel[i])) {

336 Stack.push_back(NextLevel[i]);

337

338 NextLevel[i] = NextLevel.back();

340 } else {

341 i++;

342 }

343 }

344 }

345

346 unsigned Level = CommonDominators.size();

347 while (Stack.empty()) {

351

352 Visited[MBB] = Level;

354

356 if (Succ == DefBlock) {

357 if (MBB == VisitedPostDom)

358 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);

359 else

360 FoundLoopLevel = std::min(FoundLoopLevel, Level);

361 continue;

362 }

363

364 if (Visited.try_emplace(Succ, ~0u).second) {

365 if (MBB == VisitedPostDom)

367 else

368 Stack.push_back(Succ);

369 }

370 }

371 }

372

373 CommonDominators.push_back(VisitedDom);

374 }

375};

376

377}

378

382 return MRI->createVirtualRegister(LaneMaskRegAttrs);

383}

384

393 UndefReg);

394 return UndefReg;

395}

396

397#ifndef NDEBUG

401 unsigned Size = TRI.getRegSizeInBits(Reg, MRI);

402 return Size == 1 || Size == 32;

403}

404#endif

405

406bool Vreg1LoweringHelper::lowerCopiesFromI1() {

407 bool Changed = false;

409

412 if (MI.getOpcode() != AMDGPU::COPY)

413 continue;

414

415 Register DstReg = MI.getOperand(0).getReg();

416 Register SrcReg = MI.getOperand(1).getReg();

417 if (!isVreg1(SrcReg))

418 continue;

419

420 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))

421 continue;

422

423 Changed = true;

424

425

428

430 assert(MI.getOperand(0).getSubReg());

431

432 ConstrainRegs.insert(SrcReg);

433 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)

440 }

441

443 MI->eraseFromParent();

444 DeadCopies.clear();

445 }

446 return Changed;

447}

448

452 : MF(MF), DT(DT), PDT(PDT) {

454

458

460 ExecReg = AMDGPU::EXEC_LO;

461 MovOp = AMDGPU::S_MOV_B32;

462 AndOp = AMDGPU::S_AND_B32;

463 OrOp = AMDGPU::S_OR_B32;

464 XorOp = AMDGPU::S_XOR_B32;

465 AndN2Op = AMDGPU::S_ANDN2_B32;

466 OrN2Op = AMDGPU::S_ORN2_B32;

467 } else {

469 MovOp = AMDGPU::S_MOV_B64;

470 AndOp = AMDGPU::S_AND_B64;

471 OrOp = AMDGPU::S_OR_B64;

472 XorOp = AMDGPU::S_XOR_B64;

473 AndN2Op = AMDGPU::S_ANDN2_B64;

474 OrN2Op = AMDGPU::S_ORN2_B64;

475 }

476}

477

480 LoopFinder LF(*DT, *PDT);

481 PhiIncomingAnalysis PIA(*PDT, TII);

484

486 if (Vreg1Phis.empty())

487 return false;

488

493 if (&MBB != PrevMBB) {

494 LF.initialize(MBB);

495 PrevMBB = &MBB;

496 }

497

499

500 Register DstReg = MI->getOperand(0).getReg();

503

505

506

507

508

509

511 return DT->getNode(LHS.Block)->getDFSNumIn() <

513 });

514

515#ifndef NDEBUG

517#endif

518

519

520

521 std::vector<MachineBasicBlock *> DomBlocks = {&MBB};

524

527

528

529

530

531

532 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);

533

535

536 if (FoundLoopLevel) {

538 Incomings);

539

540 for (auto &Incoming : Incomings) {

543 }

544

545 for (auto &Incoming : Incomings) {

550 }

551 } else {

552

553

554 PIA.analyze(MBB, Incomings);

555

559

560 for (auto &Incoming : Incomings) {

562 if (PIA.isSource(IMBB)) {

565 } else {

568 }

569 }

570

571 for (auto &Incoming : Incomings) {

573 continue;

574

579 }

580 }

581

583 if (NewReg != DstReg) {

585 MI->eraseFromParent();

586 }

587

588 Incomings.clear();

589 }

590 return true;

591}

592

593bool Vreg1LoweringHelper::lowerCopiesToI1() {

594 bool Changed = false;

596 LoopFinder LF(*DT, *PDT);

598

600 LF.initialize(MBB);

601

603 if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&

604 MI.getOpcode() != AMDGPU::COPY)

605 continue;

606

607 Register DstReg = MI.getOperand(0).getReg();

608 if (!isVreg1(DstReg))

609 continue;

610

611 Changed = true;

612

613 if (MRI->use_empty(DstReg)) {

615 continue;

616 }

617

619

620 markAsLaneMask(DstReg);

621 initializeLaneMaskRegisterAttributes(DstReg);

622

623 if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)

624 continue;

625

627 Register SrcReg = MI.getOperand(1).getReg();

628 assert(MI.getOperand(1).getSubReg());

629

630 if (!SrcReg.isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {

631 assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);

636 MI.getOperand(1).setReg(TmpReg);

637 SrcReg = TmpReg;

638 } else {

639

640 MI.getOperand(1).setIsKill(false);

641 }

642

643

644

645 std::vector<MachineBasicBlock *> DomBlocks = {&MBB};

648

651 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);

652 if (FoundLoopLevel) {

655 LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);

656

657 buildMergeLaneMasks(MBB, MI, DL, DstReg,

660 }

661 }

662

664 MI->eraseFromParent();

665 DeadCopies.clear();

666 }

667 return Changed;

668}

669

672 for (;;) {

674 if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF)

675 return true;

676

677 if (MI->getOpcode() != AMDGPU::COPY)

678 break;

679

680 Reg = MI->getOperand(1).getReg();

681 if (!Reg.isVirtual())

682 return false;

684 return false;

685 }

686

687 if (MI->getOpcode() != MovOp)

688 return false;

689

690 if (MI->getOperand(1).isImm())

691 return false;

692

693 int64_t Imm = MI->getOperand(1).getImm();

694 if (Imm == 0) {

695 Val = false;

696 return true;

697 }

698 if (Imm == -1) {

699 Val = true;

700 return true;

701 }

702

703 return false;

704}

705

707 Def = false;

708 Use = false;

709

711 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {

712 if (MO.isUse())

713 Use = true;

714 else

715 Def = true;

716 }

717 }

718}

719

720

721

725 bool TerminatorsUseSCC = false;

726 for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {

727 bool DefsSCC;

729 if (TerminatorsUseSCC || DefsSCC)

730 break;

731 }

732

733 if (!TerminatorsUseSCC)

734 return InsertionPt;

735

736 while (InsertionPt != MBB.begin()) {

737 InsertionPt--;

738

739 bool DefSCC, UseSCC;

741 if (DefSCC)

742 return InsertionPt;

743 }

744

745

746 llvm_unreachable("SCC used by terminator but no def in block");

747}

748

749

750void Vreg1LoweringHelper::markAsLaneMask(Register DstReg) const {

751 MRI->setRegClass(DstReg, ST->getBoolRC());

752}

753

754void Vreg1LoweringHelper::getCandidatesForLowering(

758 if (isVreg1(MI.getOperand(0).getReg()))

760 }

761 }

762}

763

764void Vreg1LoweringHelper::collectIncomingValuesFromPhi(

766 for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {

767 assert(i + 1 < MI->getNumOperands());

768 Register IncomingReg = MI->getOperand(i).getReg();

770 MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);

771

772 if (IncomingDef->getOpcode() == AMDGPU::COPY) {

774 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));

776 } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {

777 continue;

778 } else {

779 assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));

780 }

781

783 }

784}

785

786void Vreg1LoweringHelper::replaceDstReg(Register NewReg, Register OldReg,

788 MRI->replaceRegWith(NewReg, OldReg);

789}

790

796 bool PrevVal = false;

797 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);

798 bool CurVal = false;

799 bool CurConstant = isConstantLaneMask(CurReg, CurVal);

800

801 if (PrevConstant && CurConstant) {

802 if (PrevVal == CurVal) {

804 } else if (CurVal) {

806 } else {

810 }

811 return;

812 }

813

816 if (!PrevConstant) {

817 if (CurConstant && CurVal) {

818 PrevMaskedReg = PrevReg;

819 } else {

824 }

825 }

826 if (!CurConstant) {

827

828 if (PrevConstant && PrevVal) {

829 CurMaskedReg = CurReg;

830 } else {

835 }

836 }

837

838 if (PrevConstant && !PrevVal) {

840 .addReg(CurMaskedReg);

841 } else if (CurConstant && !CurVal) {

843 .addReg(PrevMaskedReg);

844 } else if (PrevConstant && PrevVal) {

846 .addReg(CurMaskedReg)

848 } else {

850 .addReg(PrevMaskedReg)

851 .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);

852 }

853}

854

855void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}

856

857

858

859

860

861

862

863

864

865

868

871 return false;

872

873 Vreg1LoweringHelper Helper(&MF, &MDT, &MPDT);

874 bool Changed = false;

875 Changed |= Helper.lowerCopiesFromI1();

876 Changed |= Helper.lowerPhis();

877 Changed |= Helper.lowerCopiesToI1();

878 return Helper.cleanConstrainRegs(Changed);

879}

880

888 if (!Changed)

890

891

894 return PA;

895}

896

898public:

900

903 }

904

906

908

914 }

915};

916

919 getAnalysis().getDomTree();

921 getAnalysis().getPostDomTree();

923}

924

926 false, false)

931

933

935

938}

unsigned const MachineRegisterInfo * MRI

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

const HexagonInstrInfo * TII

unsigned const TargetRegisterInfo * TRI

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)

static Register insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)

static bool runFixI1Copies(MachineFunction &MF, MachineDominatorTree &MDT, MachinePostDominatorTree &MPDT)

Lower all instructions that def or use vreg_1 registers.

static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)

Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...

static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)

Initialize the set of available library functions based on the specified target triple.

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

StringRef getPassName() const override

getPassName - Return a nice clean name for a pass.

bool runOnMachineFunction(MachineFunction &MF) override

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

A container for analyses that lazily runs them and caches their results.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Represents analyses that only rely on functions' control flow.

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

Implements a dense probed hash-table based set.

Base class for the actual dominator tree node.

DomTreeNodeBase * getIDom() const

bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const

dominates - Returns true iff A dominates B.

void updateDFSNumbers() const

updateDFSNumbers - Assign In and Out numbers to the nodes while walking dominator tree in dfs order.

DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const

getNode - return the (Post)DominatorTree node for the specified basic block.

Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const

Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...

FunctionPass class - This class is used to implement most global optimizations.

const SIInstrInfo * getInstrInfo() const override

iterator_range< iterator > phis()

Returns a range that iterates over the phis in the basic block.

void push_back(MachineInstr *MI)

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

iterator_range< succ_iterator > successors()

iterator_range< pred_iterator > predecessors()

Analysis pass which computes a MachineDominatorTree.

Analysis pass which computes a MachineDominatorTree.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

bool hasProperty(Property P) const

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const MachineFunctionProperties & getProperties() const

Get the function properties.

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

unsigned getSubReg() const

Register getReg() const

getReg - Returns the register number.

MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...

MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const

Returns the nearest common dominator of the given blocks.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

iterator_range< use_instr_iterator > use_instructions(Register Reg) const

MachineInstr * getUniqueVRegDef(Register Reg) const

getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...

MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, MachinePostDominatorTree *PDT)

bool isLaneMaskReg(Register Reg) const

MachineRegisterInfo * MRI

MachineDominatorTree * DT

DenseSet< Register > PhiRegisters

virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0

virtual void constrainAsLaneMask(Incoming &In)=0

virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0

virtual void markAsLaneMask(Register DstReg) const =0

MachinePostDominatorTree * PDT

MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs

MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const

Return a point at the end of the given MBB to insert SALU instructions for lane mask calculation.

void initializeLaneMaskRegisterAttributes(Register LaneMask)

bool isConstantLaneMask(Register Reg, bool &Val) const

virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0

virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

void preserveSet()

Mark an analysis set as preserved.

Wrapper class representing virtual and physical registers.

constexpr bool isValid() const

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Helper class for SSA formation on a set of values defined in multiple blocks.

void Initialize(Type *Ty, StringRef Name)

Reset this object to get ready for a new set of SSA updates with type 'Ty'.

Value * GetValueInMiddleOfBlock(BasicBlock *BB)

Construct SSA form, materializing a value that is live in the middle of the specified block.

void AddAvailableValue(BasicBlock *BB, Value *V)

Indicate that a rewritten value is available in the specified block with the specified value.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

A Use represents the edge between a Value definition and its users.

std::pair< iterator, bool > insert(const ValueT &V)

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

void sort(IteratorTy Start, IteratorTy End)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)

void initializeSILowerI1CopiesLegacyPass(PassRegistry &)

FunctionPass * createSILowerI1CopiesLegacyPass()

char & SILowerI1CopiesLegacyID

auto predecessors(const MachineBasicBlock *BB)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...

MachineBasicBlock * Block

All attributes(register class or bank and low-level type) a virtual register can have.