LLVM: lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

49

50using namespace llvm;

51

52#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"

53

54namespace {

55

56class AMDGPULowerVGPREncoding {

57 static constexpr unsigned OpNum = 4;

58 static constexpr unsigned BitsPerField = 2;

59 static constexpr unsigned NumFields = 4;

60 static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;

61 static constexpr unsigned ModeWidth = NumFields * BitsPerField;

62 static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;

64 std::bitset<BitsPerField * NumFields>>;

65

66 class ModeTy : public ModeType {

67 public:

68

69 ModeTy() : ModeType(0) {}

70

71 operator int64_t() const { return raw_bits().to_ulong(); }

72

73 static ModeTy fullMask() {

74 ModeTy M;

75 M.raw_bits().flip();

76 return M;

77 }

78 };

79

80public:

82

83private:

86

87

89

90

92

93

94 ModeTy CurrentMode;

95

96

97

98 ModeTy CurrentMask;

99

100

101 unsigned ClauseLen;

102

103

104 unsigned ClauseRemaining;

105

106

107 unsigned ClauseBreaks;

108

109

111

112

113 bool setMode(ModeTy NewMode, ModeTy Mask,

115

116

118 setMode(ModeTy(), ModeTy::fullMask(), I);

119 }

120

121

122 std::optional getMSBs(const MachineOperand &MO) const;

123

124

126

127

128

129

130

131 void computeMode(ModeTy &NewMode, ModeTy &Mask, MachineInstr &MI,

132 const AMDGPU::OpName Ops[OpNum],

133 const AMDGPU::OpName *Ops2 = nullptr);

134

135

136

137

140};

141

142bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,

144 assert((NewMode.raw_bits() & ~Mask.raw_bits()).none());

145

146 auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();

147

148 if ((Delta & Mask.raw_bits()).none()) {

149 CurrentMask |= Mask;

150 return false;

151 }

152

153 if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {

154 CurrentMode |= NewMode;

155 CurrentMask |= Mask;

156

158

159

160 int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);

161

162 Op.setImm(CurrentMode | OldModeBits);

163 return true;

164 }

165

166

167 int64_t OldModeBits = CurrentMode << ModeWidth;

168

169 I = handleClause(I);

170 MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))

171 .addImm(NewMode | OldModeBits);

172

173 CurrentMode = NewMode;

174 CurrentMask = Mask;

175 return true;

176}

177

178std::optional

179AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {

181 return std::nullopt;

182

185 if (!RC || TRI->isVGPRClass(RC))

186 return std::nullopt;

187

188 unsigned Idx = TRI->getHWRegIndex(Reg);

189 return Idx >> 8;

190}

191

192void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode, ModeTy &Mask,

194 const AMDGPU::OpName Ops[OpNum],

195 const AMDGPU::OpName *Ops2) {

196 NewMode = {};

197 Mask = {};

198

199 for (unsigned I = 0; I < OpNum; ++I) {

201

202 std::optional MSBits;

203 if (Op)

204 MSBits = getMSBs(*Op);

205

206#if !defined(NDEBUG)

207 if (MSBits.has_value() && Ops2) {

208 auto Op2 = TII->getNamedOperand(MI, Ops2[I]);

209 if (Op2) {

210 std::optional MSBits2;

211 MSBits2 = getMSBs(*Op2);

212 if (MSBits2.has_value() && MSBits != MSBits2)

214 }

215 }

216#endif

217

218 if (!MSBits.has_value() && Ops2) {

219 Op = TII->getNamedOperand(MI, Ops2[I]);

220 if (Op)

221 MSBits = getMSBs(*Op);

222 }

223

224 if (!MSBits.has_value())

225 continue;

226

227

228

229

230 if (Ops[I] == AMDGPU::OpName::src2 && Op->isDef() && Op->isTied() &&

233 TII->hasVALU32BitEncoding(MI.getOpcode()))))

234 continue;

235

236 NewMode[I] = MSBits.value();

237 Mask[I] = FieldMask;

238 }

239}

240

241bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {

243 if (Ops.first) {

244 ModeTy NewMode, Mask;

245 computeMode(NewMode, Mask, MI, Ops.first, Ops.second);

246 return setMode(NewMode, Mask, MI.getIterator());

247 }

248 assert(TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());

249

250 return false;

251}

252

255 if (!ClauseRemaining)

256 return I;

257

258

259

260 if (ClauseRemaining == ClauseLen) {

261 I = Clause->getPrevNode()->getIterator();

263 return I;

264 }

265

266

267

268 if (ClauseBreaks) {

269 Clause->eraseFromBundle();

270 ClauseRemaining = 0;

271 return I;

272 }

273

274

275

276

277

278 if (ClauseLen < 63)

279 Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));

280

281 ++ClauseLen;

282

283 return I;

284}

285

288 if (!ST.has1024AddressableVGPRs())

289 return false;

290

291 TII = ST.getInstrInfo();

292 TRI = ST.getRegisterInfo();

293

295 ClauseLen = ClauseRemaining = 0;

296 CurrentMode.reset();

297 CurrentMask.reset();

298 for (auto &MBB : MF) {

299 MostRecentModeSet = nullptr;

301

303 if (MI.isMetaInstruction())

304 continue;

305

306 if (MI.isTerminator() || MI.isCall()) {

307 if (MI.getOpcode() == AMDGPU::S_ENDPGM ||

308 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)

309 CurrentMode.reset();

310 else

311 resetMode(MI.getIterator());

312 continue;

313 }

314

315 if (MI.isInlineAsm()) {

316 if (TII->hasVGPRUses(MI))

317 resetMode(MI.getIterator());

318 continue;

319 }

320

321 if (MI.getOpcode() == AMDGPU::S_CLAUSE) {

322 assert(!ClauseRemaining && "Nested clauses are not supported");

323 ClauseLen = MI.getOperand(0).getImm();

324 ClauseBreaks = (ClauseLen >> 8) & 15;

325 ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;

327 continue;

328 }

329

330 Changed |= runOnMachineInstr(MI);

331

332 if (ClauseRemaining)

333 --ClauseRemaining;

334 }

335

336

337 resetMode(MBB.instr_end());

338 }

339

341}

342

344public:

345 static char ID;

346

348

349 bool runOnMachineFunction(MachineFunction &MF) override {

350 return AMDGPULowerVGPREncoding().run(MF);

351 }

352

353 void getAnalysisUsage(AnalysisUsage &AU) const override {

356 }

357};

358

359}

360

361char AMDGPULowerVGPREncodingLegacy::ID = 0;

362

364

366 "AMDGPU Lower VGPR Encoding", false, false)

367

371 if (!AMDGPULowerVGPREncoding().run(MF))

373

376 return PA;

377}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

Provides AMDGPU specific target descriptions.

AMD GCN specific subclass of TargetSubtarget.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Register const TargetRegisterInfo * TRI

This file implements the PackedVector class.

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

Interface definition for SIInstrInfo.

Represent the analysis usage information of a pass.

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

Represents analyses that only rely on functions' control flow.

Wrapper class representing physical registers. Should be passed by value.

Instructions::iterator instr_iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

Representation of each machine instruction.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

Store a vector of values using a specific number of bits for each value.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

static bool isVOP2(const MachineInstr &MI)

static bool isVOP3(const MCInstrDesc &Desc)

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

DWARFExpression::Operation Op

char & AMDGPULowerVGPREncodingLegacyID

Definition AMDGPULowerVGPREncoding.cpp:363