LLVM: lib/Target/AMDGPU/SIModeRegister.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
21#include
22
23#define DEBUG_TYPE "si-mode-register"
24
25STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
26
27using namespace llvm;
28
30
31
34
36
37 Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
39 };
40
41
42
46
47
48
52
53
54
57 unsigned NewMode = (Mode & NewMask);
58 return Status(NewMask, NewMode);
59 }
60
61
65
69
71
75
77};
78
80public:
81
82
84
85
86
88
89
90
92
93
94
96
97
98
100
101
102
104
106};
107
108namespace {
109
110class SIModeRegister {
111public:
112 std::vector<std::unique_ptr> BlockInfo;
113 std::queue<MachineBasicBlock *> Phase2List;
114
115
116
117
118
119
121 Status DefaultStatus =
123
125
127
129
131
133
135
138};
139
141public:
142 static char ID;
143
144 SIModeRegisterLegacy() : MachineFunctionPass(ID) {}
145
146 bool runOnMachineFunction(MachineFunction &MF) override;
147
148 void getAnalysisUsage(AnalysisUsage &AU) const override {
151 }
152};
153}
154
156 "Insert required mode register values", false, false)
157
158char SIModeRegisterLegacy::ID = 0;
159
161
163 return new SIModeRegisterLegacy();
164}
165
166
167
168
169
172 unsigned Opcode = MI.getOpcode();
173 if (TII->usesFPDPRounding(MI) ||
174 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
175 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
176 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
177 Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
178 switch (Opcode) {
179 case AMDGPU::V_INTERP_P1LL_F16:
180 case AMDGPU::V_INTERP_P1LV_F16:
181 case AMDGPU::V_INTERP_P2_F16:
182
185 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
186 unsigned Mode = MI.getOperand(2).getImm();
187 MI.removeOperand(2);
188 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
190 }
191 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
192 unsigned Mode = MI.getOperand(2).getImm();
193 MI.removeOperand(2);
194 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
196 }
197 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
198 unsigned Mode = MI.getOperand(6).getImm();
199 MI.removeOperand(6);
200 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
202 }
203 case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
204 unsigned Mode = MI.getOperand(2).getImm();
205 MI.removeOperand(2);
206 MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
208 }
209 default:
210 return DefaultStatus;
211 }
212 }
213 return Status();
214}
215
216
217
218
219
220
221void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
222 const SIInstrInfo *TII, Status InstrMode) {
223 while (InstrMode.Mask) {
226 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
227 using namespace AMDGPU::Hwreg;
230 .addImm(HwregEncoding::encode(ID_MODE, Offset, Width));
231 ++NumSetregInserted;
233 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
234 }
235}
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
257 const SIInstrInfo *TII) {
258 auto NewInfo = std::make_unique();
259 MachineInstr *InsertionPoint = nullptr;
260
261
262
263
264
265 bool RequirePending = true;
266 Status IPChange;
267 for (MachineInstr &MI : MBB) {
268 Status InstrMode = getInstructionMode(MI, TII);
269 if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
270 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
271 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
272 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
273
274
275
276 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
277 using namespace AMDGPU::Hwreg;
278 auto [Id, Offset, Width] = HwregEncoding::decode(Dst);
279 if (Id != ID_MODE)
280 continue;
281
283
284
285 if (InsertionPoint) {
286 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
287 InsertionPoint = nullptr;
288 }
289
290
291
292 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
293 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
294 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
295 unsigned Mode = (Val << Offset) & Mask;
296 Status Setreg = Status(Mask, Mode);
297
298
299 RequirePending = false;
300 NewInfo->Change = NewInfo->Change.merge(Setreg);
301 } else {
302 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
303 }
304 } else if (!NewInfo->Change.isCompatible(InstrMode)) {
305
306
307 if (InsertionPoint) {
308
309
310
312 if (RequirePending) {
313
314
315
316 NewInfo->FirstInsertionPoint = InsertionPoint;
317 NewInfo->Require = NewInfo->Change;
318 RequirePending = false;
319 } else {
320 insertSetreg(MBB, InsertionPoint, TII,
321 IPChange.delta(NewInfo->Change));
322 IPChange = NewInfo->Change;
323 }
324
325 InsertionPoint = &MI;
326 }
327 NewInfo->Change = NewInfo->Change.merge(InstrMode);
328 } else {
329
330
331 InsertionPoint = &MI;
332 IPChange = NewInfo->Change;
333 NewInfo->Change = NewInfo->Change.merge(InstrMode);
334 }
335 }
336 }
337 if (RequirePending) {
338
339
340 NewInfo->FirstInsertionPoint = InsertionPoint;
341 NewInfo->Require = NewInfo->Change;
342 } else if (InsertionPoint) {
343
344 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
345 }
346 NewInfo->Exit = NewInfo->Change;
347 BlockInfo[MBB.getNumber()] = std::move(NewInfo);
348}
349
350
351
352
353
354void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
355 const SIInstrInfo *TII) {
356 bool RevisitRequired = false;
357 bool ExitSet = false;
360
361 BlockInfo[ThisBlock]->Pred = DefaultStatus;
362 ExitSet = true;
363 } else {
364
365
366
367
368
369
370
371
372
373
375 MachineBasicBlock &PB = *(*P);
376 unsigned PredBlock = PB.getNumber();
377 if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
378 BlockInfo[ThisBlock]->Pred = DefaultStatus;
379 ExitSet = true;
380 } else if (BlockInfo[PredBlock]->ExitSet) {
381 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
382 ExitSet = true;
383 } else if (PredBlock != ThisBlock)
384 RevisitRequired = true;
385
386 for (P = std::next(P); P != E; P = std::next(P)) {
387 MachineBasicBlock *Pred = *P;
388 unsigned PredBlock = Pred->getNumber();
389 if (BlockInfo[PredBlock]->ExitSet) {
390 if (BlockInfo[ThisBlock]->ExitSet) {
391 BlockInfo[ThisBlock]->Pred =
392 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
393 } else {
394 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
395 }
396 ExitSet = true;
397 } else if (PredBlock != ThisBlock)
398 RevisitRequired = true;
399 }
400 }
401 Status TmpStatus =
402 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
403 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
404 BlockInfo[ThisBlock]->Exit = TmpStatus;
405
406
408 Phase2List.push(Succ);
409 }
410 BlockInfo[ThisBlock]->ExitSet = ExitSet;
411 if (RevisitRequired)
412 Phase2List.push(&MBB);
413}
414
415
416
417
418void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
419 const SIInstrInfo *TII) {
421 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
422 Status Delta =
423 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
424 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
425 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
426 else
428 }
429}
430
431bool SIModeRegisterLegacy::runOnMachineFunction(MachineFunction &MF) {
432 return SIModeRegister().run(MF);
433}
434
437 if (!SIModeRegister().run(MF))
441 return PA;
442}
443
445
446
447
448
449
451 if (F.hasFnAttribute(llvm::Attribute::StrictFP))
456
457
458
459
460
462 processBlockPhase1(BB, TII);
463
464
465
466
468 Phase2List.push(&BB);
469 while (!Phase2List.empty()) {
470 processBlockPhase2(*Phase2List.front(), TII);
471 Phase2List.pop();
472 }
473
474
475
477 processBlockPhase3(BB, TII);
478
479 BlockInfo.clear();
480
482}
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define FP_ROUND_MODE_DP(x)
#define FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_ZERO
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Status Change
Definition SIModeRegister.cpp:87
Status Pred
Definition SIModeRegister.cpp:95
MachineInstr * FirstInsertionPoint
Definition SIModeRegister.cpp:99
Status Exit
Definition SIModeRegister.cpp:91
Status Require
Definition SIModeRegister.cpp:83
bool ExitSet
Definition SIModeRegister.cpp:103
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
pred_iterator pred_begin()
iterator_range< succ_iterator > successors()
MachineInstr & instr_front()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM)
Definition SIModeRegister.cpp:435
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIModeRegisterPass()
Definition SIModeRegister.cpp:162
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition SIModeRegister.cpp:29
Status delta(const Status &S) const
Definition SIModeRegister.cpp:62
Status(unsigned NewMask, unsigned NewMode)
Definition SIModeRegister.cpp:37
bool isCombinable(Status &S)
Definition SIModeRegister.cpp:76
bool operator==(const Status &S) const
Definition SIModeRegister.cpp:66
bool isCompatible(Status &S)
Definition SIModeRegister.cpp:72
Status merge(const Status &S) const
Definition SIModeRegister.cpp:43
Status intersect(const Status &S) const
Definition SIModeRegister.cpp:55
bool operator!=(const Status &S) const
Definition SIModeRegister.cpp:70
unsigned Mask
Definition SIModeRegister.cpp:32
unsigned Mode
Definition SIModeRegister.cpp:33
Status mergeUnknown(unsigned newMask)
Definition SIModeRegister.cpp:49