LLVM: lib/Target/AMDGPU/SIModeRegister.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
21#include
22
23#define DEBUG_TYPE "si-mode-register"
24
25STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
26
27using namespace llvm;
28
30
31
34
36
37 Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
39 };
40
41
42
45 }
46
47
48
51 }
52
53
54
57 unsigned NewMode = (Mode & NewMask);
58 return Status(NewMask, NewMode);
59 }
60
61
64 }
65
68 }
69
71
74 }
75
77};
78
80public:
81
82
84
85
86
88
89
90
92
93
94
96
97
98
100
101
102
104
106};
107
108namespace {
109
111public:
112 static char ID;
113
114 std::vector<std::unique_ptr> BlockInfo;
115 std::queue<MachineBasicBlock *> Phase2List;
116
117
118
119
120
121
123 Status DefaultStatus =
125
126 bool Changed = false;
127
128public:
130
132
136 }
137
139
141
143
145
148};
149}
150
152 "Insert required mode register values", false, false)
153
154char SIModeRegister::ID = 0;
155
157
159
160
161
162
163
166 unsigned Opcode = MI.getOpcode();
167 if (TII->usesFPDPRounding(MI) ||
168 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
169 Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
170 switch (Opcode) {
171 case AMDGPU::V_INTERP_P1LL_F16:
172 case AMDGPU::V_INTERP_P1LV_F16:
173 case AMDGPU::V_INTERP_P2_F16:
174
177 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
178 unsigned Mode = MI.getOperand(2).getImm();
179 MI.removeOperand(2);
180
181 if (TII->getSubtarget().hasTrue16BitInsts()) {
184 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
186 MI.removeOperand(1);
187 B.addImm(0);
188 B.add(Src0);
189 B.addImm(0);
190 B.addImm(0);
191 } else
192 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
194 }
195 case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
196 unsigned Mode = MI.getOperand(2).getImm();
197 MI.removeOperand(2);
198 MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
200 }
201 default:
202 return DefaultStatus;
203 }
204 }
206}
207
208
209
210
211
212
215 while (InstrMode.Mask) {
216 unsigned Offset = llvm::countr_zero(InstrMode.Mask);
217 unsigned Width = llvm::countr_one(InstrMode.Mask >> Offset);
218 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
219 using namespace AMDGPU::Hwreg;
220 BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
222 .addImm(HwregEncoding::encode(ID_MODE, Offset, Width));
223 ++NumSetregInserted;
224 Changed = true;
225 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
226 }
227}
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
250 auto NewInfo = std::make_unique();
252
253
254
255
256
257 bool RequirePending = true;
260 Status InstrMode = getInstructionMode(MI, TII);
261 if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
262 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
263 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
264 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
265
266
267
268 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
269 using namespace AMDGPU::Hwreg;
270 auto [Id, Offset, Width] = HwregEncoding::decode(Dst);
271 if (Id != ID_MODE)
272 continue;
273
274 unsigned Mask = maskTrailingOnes(Width) << Offset;
275
276
280 }
281
282
283
284 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
285 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
286 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
287 unsigned Mode = (Val << Offset) & Mask;
289
290
291 RequirePending = false;
292 NewInfo->Change = NewInfo->Change.merge(Setreg);
293 } else {
294 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
295 }
296 } else if (!NewInfo->Change.isCompatible(InstrMode)) {
297
298
300
301
302
304 if (RequirePending) {
305
306
307
309 NewInfo->Require = NewInfo->Change;
310 RequirePending = false;
311 } else {
313 IPChange.delta(NewInfo->Change));
314 IPChange = NewInfo->Change;
315 }
316
318 }
319 NewInfo->Change = NewInfo->Change.merge(InstrMode);
320 } else {
321
322
324 IPChange = NewInfo->Change;
325 NewInfo->Change = NewInfo->Change.merge(InstrMode);
326 }
327 }
328 }
329 if (RequirePending) {
330
331
333 NewInfo->Require = NewInfo->Change;
335
337 }
338 NewInfo->Exit = NewInfo->Change;
339 BlockInfo[MBB.getNumber()] = std::move(NewInfo);
340}
341
342
343
344
345
348 bool RevisitRequired = false;
349 bool ExitSet = false;
352
353 BlockInfo[ThisBlock]->Pred = DefaultStatus;
354 ExitSet = true;
355 } else {
356
357
358
359
360
361
362
363
364
365
368 unsigned PredBlock = PB.getNumber();
369 if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
370 BlockInfo[ThisBlock]->Pred = DefaultStatus;
371 ExitSet = true;
372 } else if (BlockInfo[PredBlock]->ExitSet) {
373 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
374 ExitSet = true;
375 } else if (PredBlock != ThisBlock)
376 RevisitRequired = true;
377
378 for (P = std::next(P); P != E; P = std::next(P)) {
380 unsigned PredBlock = Pred->getNumber();
381 if (BlockInfo[PredBlock]->ExitSet) {
382 if (BlockInfo[ThisBlock]->ExitSet) {
383 BlockInfo[ThisBlock]->Pred =
384 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
385 } else {
386 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
387 }
388 ExitSet = true;
389 } else if (PredBlock != ThisBlock)
390 RevisitRequired = true;
391 }
392 }
394 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
395 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
396 BlockInfo[ThisBlock]->Exit = TmpStatus;
397
398
400 Phase2List.push(Succ);
401 }
402 BlockInfo[ThisBlock]->ExitSet = ExitSet;
403 if (RevisitRequired)
404 Phase2List.push(&MBB);
405}
406
407
408
409
413 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
415 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
416 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
417 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
418 else
420 }
421}
422
423bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
424
425
426
427
428
430 if (F.hasFnAttribute(llvm::Attribute::StrictFP))
431 return Changed;
435
436
437
438
439
441 processBlockPhase1(BB, TII);
442
443
444
445
447 Phase2List.push(&BB);
448 while (!Phase2List.empty()) {
449 processBlockPhase2(*Phase2List.front(), TII);
450 Phase2List.pop();
451 }
452
453
454
456 processBlockPhase3(BB, TII);
457
458 BlockInfo.clear();
459
460 return Changed;
461}
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
#define FP_ROUND_MODE_DP(x)
#define FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_ZERO
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
MachineInstr * FirstInsertionPoint
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
pred_iterator pred_begin()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
MachineInstr & instr_front()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
LLVM Value Representation.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIModeRegisterPass()
Status delta(const Status &S) const
Status(unsigned NewMask, unsigned NewMode)
bool isCombinable(Status &S)
bool operator==(const Status &S) const
bool isCompatible(Status &S)
Status merge(const Status &S) const
Status intersect(const Status &S) const
bool operator!=(const Status &S) const
Status mergeUnknown(unsigned newMask)