LLVM: lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
41
42using namespace llvm;
43
44#define DEBUG_TYPE "rewrite-partial-reg-uses"
45
46namespace {
47
48class GCNRewritePartialRegUsesImpl {
53
54
55
56
58
59
61
62
63
64
65
67 SubRegMap &SubRegs) const;
68
69
70
71
72
73
74
75
76
77
78
79
80
81
83 getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
84 unsigned CoverSubregIdx,
85 SubRegMap &SubRegs) const;
86
87
88
90 SubRegMap &SubRegs) const;
91
92
93
94
95
96 unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;
97
98
99
100 unsigned getSubReg(unsigned Offset, unsigned Size) const;
101
102
104
105
106
108 unsigned SubRegIdx) const;
109
110
113 SuperRegMasks;
114
115
116
117
119 getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const;
120
121
122
124
125public:
126 GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}
128};
129
131public:
132 static char ID;
134
135 StringRef getPassName() const override {
136 return "Rewrite Partial Register Uses";
137 }
138
139 void getAnalysisUsage(AnalysisUsage &AU) const override {
144 }
145
147};
148
149}
150
151
152unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,
153 unsigned Size) const {
155 if (Inserted) {
156 for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
157 if (TRI->getSubRegIdxOffset(Idx) == Offset &&
158 TRI->getSubRegIdxSize(Idx) == Size) {
159 I->second = Idx;
160 break;
161 }
162 }
163 }
164 return I->second;
165}
166
167unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,
168 unsigned RShift) const {
169 unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
171}
172
173const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
174 const TargetRegisterClass *RC, unsigned SubRegIdx) const {
176 SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
177 if (Inserted) {
178 for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) {
179 if (RCI.getSubReg() == SubRegIdx) {
180 I->second = RCI.getMask();
181 break;
182 }
183 }
184 }
185 return I->second;
186}
187
188const BitVector &
189GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
190 unsigned AlignNumBits) const {
192 AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
193 if (Inserted) {
194 BitVector &BV = I->second;
195 BV.resize(TRI->getNumRegClasses());
196 for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) {
197 auto *RC = TRI->getRegClass(ClassID);
198 if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits))
199 BV.set(ClassID);
200 }
201 }
202 return I->second;
203}
204
205const TargetRegisterClass *
206GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
207 const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx,
208 SubRegMap &SubRegs) const {
209
210 unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
211 LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign
212 << '\n');
213
214 BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
215 for (auto &[OldSubReg, NewSubReg] : SubRegs) {
216 LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':');
217
218 auto *SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
219 if (!SubRegRC) {
220 LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
221 return nullptr;
222 }
224 << (SubRegRC->isAllocatable() ? "" : " not alloc")
225 << " -> ");
226
227 if (OldSubReg == CoverSubregIdx) {
228
229 assert(SubRegRC->isAllocatable());
230 NewSubReg = AMDGPU::NoSubRegister;
232 } else {
233 NewSubReg = shiftSubReg(OldSubReg, RShift);
234 if (!NewSubReg) {
236 return nullptr;
237 }
239 }
240
241 const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
242 : SubRegRC->getSubClassMask();
243 if (!Mask)
245
246 ClassMask.clearBitsNotInMask(Mask);
247
248
249 LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n');
250 }
251
252
253
254
255
256 const TargetRegisterClass *MinRC = nullptr;
257 unsigned MinNumBits = std::numeric_limits::max();
258 for (unsigned ClassID : ClassMask.set_bits()) {
259 auto *RC = TRI->getRegClass(ClassID);
260 unsigned NumBits = TRI->getRegSizeInBits(*RC);
261 if (NumBits < MinNumBits) {
262 MinNumBits = NumBits;
263 MinRC = RC;
264 }
265 }
266#ifndef NDEBUG
267 if (MinRC) {
269 for (auto [OldSubReg, NewSubReg] : SubRegs)
270
271 assert(MinRC == TRI->getSubClassWithSubReg(MinRC, NewSubReg));
272 }
273#endif
274
275
276 return (MinRC != RC || RShift != 0) ? MinRC : nullptr;
277}
278
279const TargetRegisterClass *
280GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
281 SubRegMap &SubRegs) const {
282 unsigned CoverSubreg = AMDGPU::NoSubRegister;
283 unsigned Offset = std::numeric_limits::max();
284 unsigned End = 0;
285 for (auto [SubReg, SRI] : SubRegs) {
286 unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg);
287 unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg);
288 if (SubRegOffset < Offset) {
289 Offset = SubRegOffset;
290 CoverSubreg = AMDGPU::NoSubRegister;
291 }
292 if (SubRegEnd > End) {
293 End = SubRegEnd;
294 CoverSubreg = AMDGPU::NoSubRegister;
295 }
296 if (SubRegOffset == Offset && SubRegEnd == End)
297 CoverSubreg = SubReg;
298 }
299
300
301 if (CoverSubreg != AMDGPU::NoSubRegister)
302 return getRegClassWithShiftedSubregs(RC, Offset, CoverSubreg, SubRegs);
303
304
305
306
307 unsigned MaxAlign = 0;
308 for (auto [SubReg, SRI] : SubRegs)
309 MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg));
310
311 unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits::max();
312 for (auto [SubReg, SRI] : SubRegs) {
313 if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign)
314 continue;
315 FirstMaxAlignedSubRegOffset =
316 std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg));
317 if (FirstMaxAlignedSubRegOffset == Offset)
318 break;
319 }
320
321 unsigned NewOffsetOfMaxAlignedSubReg =
322 alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign);
323
324 if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
326
327 unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
328 return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs);
329}
330
331
332
333void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
334 Register OldReg, Register NewReg, SubRegMap &SubRegs) const {
336 return;
337
340
342 NewLI.setWeight(OldLI.weight());
343
344 for (auto &SR : OldLI.subranges()) {
345 auto I = find_if(SubRegs, [&](auto &P) {
346 return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first);
347 });
348
349 if (I == SubRegs.end()) {
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
371 return;
372 }
373
374 if (unsigned NewSubReg = I->second)
375 NewLI.createSubRangeFrom(Allocator,
376 TRI->getSubRegIndexLaneMask(NewSubReg), SR);
377 else
379
380 SubRegs.erase(I);
381 }
382 if (NewLI.empty())
386}
387
388bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
389
390
391 SubRegMap SubRegs;
392 for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
393 if (MO.getSubReg() == AMDGPU::NoSubRegister)
394 return false;
395 SubRegs.try_emplace(MO.getSubReg());
396 }
397
398 if (SubRegs.empty())
399 return false;
400
401 auto *RC = MRI->getRegClass(Reg);
403 << ':' << TRI->getRegClassName(RC) << '\n');
404
405 auto *NewRC = getMinSizeReg(RC, SubRegs);
406 if (!NewRC) {
408 return false;
409 }
410
411 Register NewReg = MRI->createVirtualRegister(NewRC);
413 << TRI->getRegClassName(RC) << " -> "
415 << TRI->getRegClassName(NewRC) << '\n');
416
418 MO.setReg(NewReg);
419
420
421 if (MO.isDebug() && MO.getSubReg() == 0)
422 continue;
423 unsigned NewSubReg = SubRegs[MO.getSubReg()];
424 MO.setSubReg(NewSubReg);
425 if (NewSubReg == AMDGPU::NoSubRegister && MO.isDef())
426 MO.setIsUndef(false);
427 }
428
429 if (LIS)
430 updateLiveIntervals(Reg, NewReg, SubRegs);
431
432 return true;
433}
434
435bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
437 TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
440 for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
441 Changed |= rewriteReg(Register::index2VirtReg(I));
442 }
444}
445
446bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
447 LiveIntervalsWrapperPass *LISWrapper =
448 getAnalysisIfAvailable();
449 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
450 GCNRewritePartialRegUsesImpl Impl(LIS);
451 return Impl.run(MF);
452}
453
454PreservedAnalyses
458 if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
460
465 return PA;
466}
467
468char GCNRewritePartialRegUsesLegacy::ID;
469
471
473 "Rewrite Partial Register Uses", false, false)
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition for SIRegisterInfo.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Represents analyses that only rely on functions' control flow.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition GCNRewritePartialRegUses.cpp:455
bool hasInterval(Register Reg) const
VNInfo::Allocator & getVNInfoAllocator()
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
bool isAllocatable() const
Return true if this register class may be used to create virtual registers.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & GCNRewritePartialRegUsesID
Definition GCNRewritePartialRegUses.cpp:470
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.