LLVM: lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
49
50using namespace llvm;
51
52#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"
53
54namespace {
55
56class AMDGPULowerVGPREncoding {
57 static constexpr unsigned OpNum = 4;
58 static constexpr unsigned BitsPerField = 2;
59 static constexpr unsigned NumFields = 4;
60 static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
61 static constexpr unsigned ModeWidth = NumFields * BitsPerField;
62 static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
64 std::bitset<BitsPerField * NumFields>>;
65
66 class ModeTy : public ModeType {
67 public:
68
69 ModeTy() : ModeType(0) {}
70
71 operator int64_t() const { return raw_bits().to_ulong(); }
72
73 static ModeTy fullMask() {
74 ModeTy M;
75 M.raw_bits().flip();
76 return M;
77 }
78 };
79
80public:
82
83private:
86
87
89
90
92
93
94 ModeTy CurrentMode;
95
96
97
98 ModeTy CurrentMask;
99
100
101 unsigned ClauseLen;
102
103
104 unsigned ClauseRemaining;
105
106
107 unsigned ClauseBreaks;
108
109
111
112
113 bool setMode(ModeTy NewMode, ModeTy Mask,
115
116
118 setMode(ModeTy(), ModeTy::fullMask(), I);
119 }
120
121
122 std::optional getMSBs(const MachineOperand &MO) const;
123
124
126
127
128
129
130
131 void computeMode(ModeTy &NewMode, ModeTy &Mask, MachineInstr &MI,
132 const AMDGPU::OpName Ops[OpNum],
133 const AMDGPU::OpName *Ops2 = nullptr);
134
135
136
137
140};
141
142bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
144 assert((NewMode.raw_bits() & ~Mask.raw_bits()).none());
145
146 auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();
147
148 if ((Delta & Mask.raw_bits()).none()) {
149 CurrentMask |= Mask;
150 return false;
151 }
152
153 if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {
154 CurrentMode |= NewMode;
155 CurrentMask |= Mask;
156
158
159
160 int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
161
162 Op.setImm(CurrentMode | OldModeBits);
163 return true;
164 }
165
166
167 int64_t OldModeBits = CurrentMode << ModeWidth;
168
170 MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
171 .addImm(NewMode | OldModeBits);
172
173 CurrentMode = NewMode;
174 CurrentMask = Mask;
175 return true;
176}
177
178std::optional
179AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {
181 return std::nullopt;
182
185 if (!RC || ->isVGPRClass(RC))
186 return std::nullopt;
187
188 unsigned Idx = TRI->getHWRegIndex(Reg);
189 return Idx >> 8;
190}
191
192void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode, ModeTy &Mask,
194 const AMDGPU::OpName Ops[OpNum],
195 const AMDGPU::OpName *Ops2) {
196 NewMode = {};
197 Mask = {};
198
199 for (unsigned I = 0; I < OpNum; ++I) {
201
202 std::optional MSBits;
203 if (Op)
204 MSBits = getMSBs(*Op);
205
206#if !defined(NDEBUG)
207 if (MSBits.has_value() && Ops2) {
208 auto Op2 = TII->getNamedOperand(MI, Ops2[I]);
209 if (Op2) {
210 std::optional MSBits2;
211 MSBits2 = getMSBs(*Op2);
212 if (MSBits2.has_value() && MSBits != MSBits2)
214 }
215 }
216#endif
217
218 if (!MSBits.has_value() && Ops2) {
219 Op = TII->getNamedOperand(MI, Ops2[I]);
220 if (Op)
221 MSBits = getMSBs(*Op);
222 }
223
224 if (!MSBits.has_value())
225 continue;
226
227
228
229
230 if (Ops[I] == AMDGPU::OpName::src2 && ->isDef() && Op->isTied() &&
233 TII->hasVALU32BitEncoding(MI.getOpcode()))))
234 continue;
235
236 NewMode[I] = MSBits.value();
237 Mask[I] = FieldMask;
238 }
239}
240
241bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
243 if (Ops.first) {
244 ModeTy NewMode, Mask;
245 computeMode(NewMode, Mask, MI, Ops.first, Ops.second);
246 return setMode(NewMode, Mask, MI.getIterator());
247 }
248 assert(->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());
249
250 return false;
251}
252
255 if (!ClauseRemaining)
256 return I;
257
258
259
260 if (ClauseRemaining == ClauseLen) {
261 I = Clause->getPrevNode()->getIterator();
263 return I;
264 }
265
266
267
268 if (ClauseBreaks) {
269 Clause->eraseFromBundle();
270 ClauseRemaining = 0;
271 return I;
272 }
273
274
275
276
277
278 if (ClauseLen < 63)
279 Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));
280
281 ++ClauseLen;
282
283 return I;
284}
285
288 if (!ST.has1024AddressableVGPRs())
289 return false;
290
291 TII = ST.getInstrInfo();
292 TRI = ST.getRegisterInfo();
293
295 ClauseLen = ClauseRemaining = 0;
296 CurrentMode.reset();
297 CurrentMask.reset();
298 for (auto &MBB : MF) {
299 MostRecentModeSet = nullptr;
301
303 if (MI.isMetaInstruction())
304 continue;
305
306 if (MI.isTerminator() || MI.isCall()) {
307 if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
308 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)
309 CurrentMode.reset();
310 else
311 resetMode(MI.getIterator());
312 continue;
313 }
314
315 if (MI.isInlineAsm()) {
317 resetMode(MI.getIterator());
318 continue;
319 }
320
321 if (MI.getOpcode() == AMDGPU::S_CLAUSE) {
322 assert(!ClauseRemaining && "Nested clauses are not supported");
323 ClauseLen = MI.getOperand(0).getImm();
324 ClauseBreaks = (ClauseLen >> 8) & 15;
325 ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;
327 continue;
328 }
329
330 Changed |= runOnMachineInstr(MI);
331
332 if (ClauseRemaining)
333 --ClauseRemaining;
334 }
335
336
337 resetMode(MBB.instr_end());
338 }
339
341}
342
344public:
345 static char ID;
346
348
349 bool runOnMachineFunction(MachineFunction &MF) override {
350 return AMDGPULowerVGPREncoding().run(MF);
351 }
352
353 void getAnalysisUsage(AnalysisUsage &AU) const override {
356 }
357};
358
359}
360
361char AMDGPULowerVGPREncodingLegacy::ID = 0;
362
364
366 "AMDGPU Lower VGPR Encoding", false, false)
367
371 if (!AMDGPULowerVGPREncoding().run(MF))
373
376 return PA;
377}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
AMD GCN specific subclass of TargetSubtarget.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
This file implements the PackedVector class.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Interface definition for SIInstrInfo.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Wrapper class representing physical registers. Should be passed by value.
Instructions::iterator instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Store a vector of values using a specific number of bits for each value.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVOP2(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
DWARFExpression::Operation Op
char & AMDGPULowerVGPREncodingLegacyID
Definition AMDGPULowerVGPREncoding.cpp:363