LLVM: lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

25

26#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"

27

28using namespace llvm;

29

30namespace {

31

33public:

34 static char ID;

35

36public:

38

40

41 StringRef getPassName() const override {

42 return "AMDGPU GlobalISel divergence lowering";

43 }

44

45 void getAnalysisUsage(AnalysisUsage &AU) const override {

51 }

52};

53

55public:

59

60private:

64

65public:

66 void markAsLaneMask(Register DstReg) const override;

67 void getCandidatesForLowering(

69 void collectIncomingValuesFromPhi(

78 void constrainAsLaneMask(Incoming &In) override;

79

80 bool lowerTemporalDivergence();

81 bool lowerTemporalDivergenceI1();

82};

83

84DivergenceLoweringHelper::DivergenceLoweringHelper(

88

89

90void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {

92

93 if (MRI->getRegClassOrNull(DstReg)) {

94 if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))

95 return;

97 }

98

99 MRI->setRegClass(DstReg, ST->getBoolRC());

100}

101

102void DivergenceLoweringHelper::getCandidatesForLowering(

105

106

109 Register Dst = MI.getOperand(0).getReg();

112 }

113 }

114}

115

116void DivergenceLoweringHelper::collectIncomingValuesFromPhi(

118 for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {

120 MI->getOperand(i + 1).getMBB(), Register());

121 }

122}

123

124void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,

126 BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)

128}

129

130

131

136 B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));

137 B.buildCopy(LaneMask, Reg);

138 return LaneMask;

139}

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164void DivergenceLoweringHelper::buildMergeLaneMasks(

167

168

169

170 Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);

171 Register CurRegCopy = buildRegCopyToLaneMask(CurReg);

174

175 B.setInsertPt(MBB, I);

176 B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});

177 B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});

178 B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});

179}

180

181

182

183

184void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) {

185 B.setInsertPt(*In.Block, In.Block->getFirstTerminator());

186

187 auto Copy = B.buildCopy(LLT::scalar(1), In.Reg);

188 MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());

189 In.Reg = Copy.getReg(0);

190}

191

195 if (Op.isReg() && Op.getReg() == Reg)

196 Op.setReg(NewReg);

197 }

198}

199

200bool DivergenceLoweringHelper::lowerTemporalDivergence() {

203

207 continue;

208

210 if (CachedTDCopy) {

211 replaceUsesOfRegInInstWith(Reg, UseInst, CachedTDCopy);

212 continue;

213 }

214

217 B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Inst->getIterator())));

218

219 Register VgprReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));

220 B.buildInstr(AMDGPU::COPY, {VgprReg}, {Reg})

222

223 replaceUsesOfRegInInstWith(Reg, UseInst, VgprReg);

224 TDCache[Reg] = VgprReg;

225 }

226 return false;

227}

228

229bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {

231 initializeLaneMaskRegisterAttributes(BoolS1);

233

234

235

239 continue;

240

241 auto [LRCCacheIter, RegNotCached] = LRCCache.try_emplace(Reg);

242 auto &CycleMergedMask = LRCCacheIter->getSecond();

243 const MachineCycle *&CachedLRC = CycleMergedMask.first;

244 if (RegNotCached || LRC->contains(CachedLRC)) {

245 CachedLRC = LRC;

246 }

247 }

248

249 for (auto &LRCCacheEntry : LRCCache) {

251 auto &CycleMergedMask = LRCCacheEntry.getSecond();

253

254 Register MergedMask = MRI->createVirtualRegister(BoolS1);

256

259

260 for (auto Entry : Cycle->getEntries()) {

262 if (Cycle->contains(Pred)) {

263 B.setInsertPt(*Pred, Pred->getFirstTerminator());

264 auto ImplDef = B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});

266 }

267 }

268 }

269

270 buildMergeLaneMasks(*MBB, MBB->getFirstTerminator(), {}, MergedMask,

272

273 CycleMergedMask.second = MergedMask;

274 }

275

278 continue;

279

280 replaceUsesOfRegInInstWith(Reg, UseInst, LRCCache.lookup(Reg).second);

281 }

282

283 return false;

284}

285

286}

287

289 "AMDGPU GlobalISel divergence lowering", false, false)

294 "AMDGPU GlobalISel divergence lowering", false, false)

295

296char AMDGPUGlobalISelDivergenceLowering::ID = 0;

297

299 AMDGPUGlobalISelDivergenceLowering::ID;

300

302 return new AMDGPUGlobalISelDivergenceLowering();

303}

304

305bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(

308 getAnalysis().getDomTree();

310 getAnalysis().getPostDomTree();

312 getAnalysis().getUniformityInfo();

313

314 DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);

315

317

318

319

320

321

322

323

324 Changed |= Helper.lowerTemporalDivergence();

325

326

327 Changed |= Helper.lowerTemporalDivergenceI1();

328

329

330

331 Changed |= Helper.lowerPhis();

333}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file declares the MachineIRBuilder class.

Machine IR instance of the generic uniformity analysis.

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...

bool isS32S64LaneMask(Register Reg) const

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

FunctionPass class - This class is used to implement most global optimizations.

bool isDivergent(ConstValueRefT V) const

Whether V is divergent at its definition.

iterator_range< TemporalDivergenceTuple * > getTemporalDivergenceList() const

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

iterator_range< pred_iterator > predecessors()

MachineInstrBundleIterator< MachineInstr > iterator

Analysis pass which computes a MachineDominatorTree.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

Helper class to build MachineInstr.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

const MachineBasicBlock * getParent() const

MachineOperand class - Representation of each machine instruction operand.

MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...

MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...

Legacy analysis pass which computes a MachineUniformityInfo.

Wrapper class representing virtual and physical registers.

Helper class for SSA formation on a set of values defined in multiple blocks.

void Initialize(Type *Ty, StringRef Name)

Reset this object to get ready for a new set of SSA updates with type 'Ty'.

Value * GetValueInMiddleOfBlock(BasicBlock *BB)

Construct SSA form, materializing a value that is live in the middle of the specified block.

void AddAvailableValue(BasicBlock *BB, Value *V)

Indicate that a rewritten value is available in the specified block with the specified value.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

StringRef - Represent a constant reference to a string, i.e.

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

This is an optimization pass for GlobalISel generic memory operations.

GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

char & AMDGPUGlobalISelDivergenceLoweringID

Definition AMDGPUGlobalISelDivergenceLowering.cpp:298

Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)

DWARFExpression::Operation Op

FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()

Definition AMDGPUGlobalISelDivergenceLowering.cpp:301

MachineCycleInfo::CycleT MachineCycle

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...

All attributes(register class or bank and low-level type) a virtual register can have.