LLVM: lib/Target/AMDGPU/R600MachineScheduler.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
17
18using namespace llvm;
19
20#define DEBUG_TYPE "machine-scheduler"
21
26 TII = static_cast<const R600InstrInfo*>(DAG->TII);
28 VLIW5 = !ST.hasCaymanISA();
29 MRI = &DAG->MRI;
30 CurInstKind = IDOther;
31 CurEmitted = 0;
32 OccupiedSlotsMask = 31;
33 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
34 InstKindLimit[IDOther] = 32;
35 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
36 AluInstCount = 0;
37 FetchInstCount = 0;
38}
39
40void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
41 std::vector<SUnit *> &QDst)
42{
44 QSrc.clear();
45}
46
48 assert (GPRCount && "GPRCount cannot be 0");
49 return 248 / GPRCount;
50}
51
53 SUnit *SU = nullptr;
54 NextInstKind = IDOther;
55
56 IsTopNode = false;
57
58
59 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
60 (Available[CurInstKind].empty());
61 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
62 (!Available[IDFetch].empty() || !Available[IDOther].empty());
63
64 if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
65
66
67
68
69 float ALUFetchRationEstimate =
70 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
71 (FetchInstCount + Available[IDFetch].size());
72 if (ALUFetchRationEstimate == 0) {
73 AllowSwitchFromAlu = true;
74 } else {
75 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
76 LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n");
77
78
79
80
81
82
83
84
85
86
87 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
89 AllowSwitchFromAlu = true;
90 }
91 }
92
93 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
94 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
95
96 SU = pickAlu();
97 if (!SU && !PhysicalRegCopy.empty()) {
98 SU = PhysicalRegCopy.front();
99 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
100 }
101 if (SU) {
102 if (CurEmitted >= InstKindLimit[IDAlu])
103 CurEmitted = 0;
104 NextInstKind = IDAlu;
105 }
106 }
107
108 if (!SU) {
109
110 SU = pickOther(IDFetch);
111 if (SU)
112 NextInstKind = IDFetch;
113 }
114
115
116 if (!SU) {
117 SU = pickOther(IDOther);
118 if (SU)
119 NextInstKind = IDOther;
120 }
121
123 dbgs() << " ** Pick node **\n";
124 DAG->dumpNode(*SU);
125 } else {
126 dbgs() << "NO NODE \n";
127 for (const SUnit &S : DAG->SUnits)
129 DAG->dumpNode(S);
130 });
131
132 return SU;
133}
134
136 if (NextInstKind != CurInstKind) {
138 if (NextInstKind != IDAlu)
139 OccupiedSlotsMask |= 31;
140 CurEmitted = 0;
141 CurInstKind = NextInstKind;
142 }
143
144 if (CurInstKind == IDAlu) {
145 AluInstCount ++;
146 switch (getAluKind(SU)) {
147 case AluT_XYZW:
148 CurEmitted += 4;
149 break;
150 case AluDiscarded:
151 break;
152 default: {
153 ++CurEmitted;
157 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
158 ++CurEmitted;
159 }
160 }
161 }
162 } else {
163 ++CurEmitted;
164 }
165
166 LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
167
168 if (CurInstKind != IDFetch) {
169 MoveUnits(Pending[IDFetch], Available[IDFetch]);
170 } else
171 FetchInstCount++;
172}
173
174static bool
176 if (MI->getOpcode() != R600::COPY)
177 return false;
178
179 return ->getOperand(1).getReg().isVirtual();
180}
181
185
187 LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));
189 PhysicalRegCopy.push_back(SU);
190 return;
191 }
192
193 int IK = getInstKind(SU);
194
195
196 if (IK == IDOther)
197 Available[IDOther].push_back(SU);
198 else
199 Pending[IK].push_back(SU);
200
201}
202
203bool R600SchedStrategy::regBelongsToClass(Register Reg,
205 if (!Reg.isVirtual())
207 return MRI->getRegClass(Reg) == RC;
208}
209
210R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
212
213 if (TII->isTransOnly(*MI))
214 return AluTrans;
215
216 switch (MI->getOpcode()) {
217 case R600::PRED_X:
218 return AluPredX;
219 case R600::INTERP_PAIR_XY:
220 case R600::INTERP_PAIR_ZW:
221 case R600::INTERP_VEC_LOAD:
222 case R600::DOT_4:
223 return AluT_XYZW;
224 case R600::COPY:
225 if (MI->getOperand(1).isUndef()) {
226
227 return AluDiscarded;
228 }
229 break;
230 default:
231 break;
232 }
233
234
235
236
237 if(TII->isVector(*MI) ||
238 TII->isCubeOp(MI->getOpcode()) ||
239 TII->isReductionOp(MI->getOpcode()) ||
240 MI->getOpcode() == R600::GROUP_BARRIER) {
241 return AluT_XYZW;
242 }
243
244 if (TII->isLDSInstr(MI->getOpcode())) {
245 return AluT_X;
246 }
247
248
249 unsigned DestSubReg = MI->getOperand(0).getSubReg();
250 switch (DestSubReg) {
251 case R600::sub0:
252 return AluT_X;
253 case R600::sub1:
254 return AluT_Y;
255 case R600::sub2:
256 return AluT_Z;
257 case R600::sub3:
258 return AluT_W;
259 default:
260 break;
261 }
262
263
264 Register DestReg = MI->getOperand(0).getReg();
265 if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
266 regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
267 return AluT_X;
268 if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
269 return AluT_Y;
270 if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
271 return AluT_Z;
272 if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
273 return AluT_W;
274 if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
275 return AluT_XYZW;
276
277
278 if (TII->readsLDSSrcReg(*MI))
279 return AluT_XYZW;
280
281 return AluAny;
282}
283
284int R600SchedStrategy::getInstKind(SUnit* SU) {
286
287 if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
288 return IDFetch;
289
290 if (TII->isALUInstr(Opcode)) {
291 return IDAlu;
292 }
293
294 switch (Opcode) {
295 case R600::PRED_X:
296 case R600::COPY:
297 case R600::CONST_COPY:
298 case R600::INTERP_PAIR_XY:
299 case R600::INTERP_PAIR_ZW:
300 case R600::INTERP_VEC_LOAD:
301 case R600::DOT_4:
302 return IDAlu;
303 default:
304 return IDOther;
305 }
306}
307
308SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
309 if (Q.empty())
310 return nullptr;
311 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
312 It != E; ++It) {
313 SUnit *SU = *It;
314 InstructionsGroupCandidate.push_back(SU->getInstr());
315 if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) &&
316 (!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) {
317 InstructionsGroupCandidate.pop_back();
318 Q.erase((It + 1).base());
319 return SU;
320 }
321 InstructionsGroupCandidate.pop_back();
322 }
323 return nullptr;
324}
325
326void R600SchedStrategy::LoadAlu() {
327 std::vector<SUnit *> &QSrc = Pending[IDAlu];
328 for (SUnit *SU : QSrc) {
329 AluKind AK = getAluKind(SU);
330 AvailableAlus[AK].push_back(SU);
331 }
332 QSrc.clear();
333}
334
335void R600SchedStrategy::PrepareNextSlot() {
337 assert(OccupiedSlotsMask && "Slot wasn't filled");
338 OccupiedSlotsMask = 0;
339
340
341 InstructionsGroupCandidate.clear();
342 LoadAlu();
343}
344
345void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
346 int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
347 if (DstIndex == -1) {
348 return;
349 }
350 Register DestReg = MI->getOperand(DstIndex).getReg();
351
352
353 for (const MachineOperand &MO : MI->all_uses())
354 if (MO.getReg() == DestReg)
355 return;
356
357 switch (Slot) {
358 case 0:
359 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
360 break;
361 case 1:
362 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
363 break;
364 case 2:
365 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
366 break;
367 case 3:
368 MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
369 break;
370 }
371}
372
373SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
374 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
375 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
376 if (SlotedSU)
377 return SlotedSU;
378 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
379 if (UnslotedSU)
380 AssignSlot(UnslotedSU->getInstr(), Slot);
381 return UnslotedSU;
382}
383
384unsigned R600SchedStrategy::AvailablesAluCount() const {
385 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
386 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
387 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
388 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
389 AvailableAlus[AluPredX].size();
390}
391
392SUnit* R600SchedStrategy::pickAlu() {
393 while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
394 if (!OccupiedSlotsMask) {
395
396 if (!AvailableAlus[AluPredX].empty()) {
397 OccupiedSlotsMask |= 31;
398 return PopInst(AvailableAlus[AluPredX], false);
399 }
400
401 if (!AvailableAlus[AluDiscarded].empty()) {
402 OccupiedSlotsMask |= 31;
403 return PopInst(AvailableAlus[AluDiscarded], false);
404 }
405
406 if (!AvailableAlus[AluT_XYZW].empty()) {
407 OccupiedSlotsMask |= 15;
408 return PopInst(AvailableAlus[AluT_XYZW], false);
409 }
410 }
411 bool TransSlotOccupied = OccupiedSlotsMask & 16;
412 if (!TransSlotOccupied && VLIW5) {
413 if (!AvailableAlus[AluTrans].empty()) {
414 OccupiedSlotsMask |= 16;
415 return PopInst(AvailableAlus[AluTrans], false);
416 }
417 SUnit *SU = AttemptFillSlot(3, true);
418 if (SU) {
419 OccupiedSlotsMask |= 16;
420 return SU;
421 }
422 }
423 for (int Chan = 3; Chan > -1; --Chan) {
424 bool isOccupied = OccupiedSlotsMask & (1 << Chan);
425 if (!isOccupied) {
426 SUnit *SU = AttemptFillSlot(Chan, false);
427 if (SU) {
428 OccupiedSlotsMask |= (1 << Chan);
429 InstructionsGroupCandidate.push_back(SU->getInstr());
430 return SU;
431 }
432 }
433 }
434 PrepareNextSlot();
435 }
436 return nullptr;
437}
438
439SUnit* R600SchedStrategy::pickOther(int QID) {
440 SUnit *SU = nullptr;
441 std::vector<SUnit *> &AQ = Available[QID];
442
443 if (AQ.empty()) {
444 MoveUnits(Pending[QID], AQ);
445 }
446 if (!AQ.empty()) {
447 SU = AQ.back();
448 AQ.pop_back();
449 }
450 return SU;
451}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Promote Memory to Register
Provides R600 specific target descriptions.
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
Definition R600MachineScheduler.cpp:47
static bool isPhysicalRegCopy(MachineInstr *MI)
Definition R600MachineScheduler.cpp:175
R600 Machine Scheduler interface.
AMDGPU R600 specific subclass of TargetSubtarget.
Representation of each machine instruction.
mop_iterator operands_begin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mop_iterator operands_end()
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition R600MachineScheduler.cpp:22
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
Definition R600MachineScheduler.cpp:52
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Definition R600MachineScheduler.cpp:186
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
Definition R600MachineScheduler.cpp:182
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Definition R600MachineScheduler.cpp:135
Wrapper class representing virtual and physical registers.
Scheduling unit. This is a node in the scheduling DAG.
bool isScheduled
True once scheduled.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.