LLVM: lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
33
34#define DEBUG_TYPE "amdgpu-regbanklegalize"
35
36using namespace llvm;
37using namespace AMDGPU;
39
40namespace {
41
42
43template
45m_GAMDGPUReadAnyLane(const SrcTy &Src) {
47}
48
50public:
51 static char ID;
52
53public:
55
57
58 StringRef getPassName() const override {
59 return "AMDGPU Register Bank Legalize";
60 }
61
62 void getAnalysisUsage(AnalysisUsage &AU) const override {
67 }
68
69
70
73 }
74};
75
76}
77
79 "AMDGPU Register Bank Legalize", false, false)
84 "AMDGPU Register Bank Legalize", false, false)
85
86char AMDGPURegBankLegalize::ID = 0;
87
89
91 return new AMDGPURegBankLegalize();
92}
93
96 static std::mutex GlobalMutex;
98 CacheForRuleSet;
99 std::lock_guardstd::mutex Lock(GlobalMutex);
100 auto [It, Inserted] = CacheForRuleSet.try_emplace(ST.getGeneration());
101 if (Inserted)
102 It->second = std::make_unique(ST, MRI);
103 else
104 It->second->refreshRefs(ST, MRI);
105 return *It->second;
106}
107
115
120
121public:
124 : B(B), MRI(*B.getMRI()), TRI(TRI),
125 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
126 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
127 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {};
128
130 std::pair<MachineInstr *, Register> tryMatch(Register Src, unsigned Opcode);
134
138};
139
141 const RegisterBank *RB = MRI.getRegBankOrNull(Reg);
142 if (RB && RB->getID() == AMDGPU::VCCRegBankID)
143 return true;
144
146 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(1);
147}
148
149std::pair<MachineInstr *, Register>
156
157std::pair<GUnmerge *, int>
159 MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
160 if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
161 return {nullptr, -1};
162
165 return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
166
167 return {nullptr, -1};
168}
169
171
173 if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
174 return RALSrc;
175
176
177
178
181 return RALSrc;
182 }
183
184
185
186
189 return RALSrc;
190 }
191
192
193
194
195
198 unsigned NumElts = Merge->getNumSources();
200 if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
201 return {};
202
203
204 for (unsigned i = 1; i < NumElts; ++i) {
206 if (UnmergeI != Unmerge || (unsigned)IdxI != i)
207 return {};
208 }
209 return Unmerge->getSourceReg();
210 }
211
212
213
214
216 if (!UnMerge)
217 return {};
218
219 int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
221 if ( || UnMerge->getNumDefs() != Merge->getNumSources())
222 return {};
223
225 if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
226 return {};
227
228 auto [RALEl, RALElSrc] = tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
229 if (RALEl)
230 return RALElSrc;
231
232 return {};
233}
234
237 if (Dst.isVirtual())
238 MRI.replaceRegWith(Dst, Src);
239 else
240 B.buildCopy(Dst, Src);
241}
242
245 Register Dst = Copy.getOperand(0).getReg();
246 Register Src = Copy.getOperand(1).getReg();
247
248
249 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
250 : !TRI.isVGPR(MRI, Dst))
251 return false;
252
253
254 if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
255 return false;
256
259 if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
261
263 if (!RALSrc)
264 return false;
265
266 B.setInstr(Copy);
267 if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
268
269
270
271
273 } else {
274
275
276
277
278
279
280 auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
282 }
283
285 return true;
286}
287
290 return;
291
292 Register Dst = MI.getOperand(0).getReg();
293 Register Src = MI.getOperand(1).getReg();
294
295 if (!Dst.isVirtual() || !Src.isVirtual())
296 return;
297
298
299
300
301
302
303
304
305 if (isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {
306 auto [Trunc, TruncS32Src] = tryMatch(Src, AMDGPU::G_TRUNC);
307 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
308 "sgpr S1 must be result of G_TRUNC of sgpr S32");
309
310 B.setInstr(MI);
311
312 auto One = B.buildConstant({SgprRB, S32}, 1);
313 auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);
314 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
316 }
317}
318
320
321
322
323
324 Register Dst = MI.getOperand(0).getReg();
325 Register Src = MI.getOperand(1).getReg();
326 if (MRI.getType(Src) != S1)
327 return;
328
329 auto [Trunc, TruncSrc] = tryMatch(Src, AMDGPU::G_TRUNC);
330 if (!Trunc)
331 return;
332
333 LLT DstTy = MRI.getType(Dst);
334 LLT TruncSrcTy = MRI.getType(TruncSrc);
335
336 if (DstTy == TruncSrcTy) {
337 MRI.replaceRegWith(Dst, TruncSrc);
339 return;
340 }
341
342 B.setInstr(MI);
343
344 if (DstTy == S32 && TruncSrcTy == S64) {
345 auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);
346 MRI.replaceRegWith(Dst, Unmerge.getReg(0));
348 return;
349 }
350
351 if (DstTy == S64 && TruncSrcTy == S32) {
352 B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
353 {TruncSrc, B.buildUndef({SgprRB, S32})});
355 return;
356 }
357
358 if (DstTy == S32 && TruncSrcTy == S16) {
359 B.buildAnyExt(Dst, TruncSrc);
361 return;
362 }
363
364 if (DstTy == S16 && TruncSrcTy == S32) {
365 B.buildTrunc(Dst, TruncSrc);
367 return;
368 }
369
371}
372
373
376 for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
379 continue;
380
382 if (RB && RB->getID() == AMDGPU::SGPRRegBankID) {
383 LLVM_DEBUG(dbgs() << "Warning: detected sgpr S1 register in: ";
385 return Reg;
386 }
387 }
388
389 return {};
390}
391
392bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
394 return false;
395
396
397 const TargetPassConfig &TPC = getAnalysis();
398 GISelCSEAnalysisWrapper &Wrapper =
399 getAnalysis().getCSEWrapper();
401 GISelObserverWrapper Observer;
403
404 CSEMIRBuilder B(MF);
405 B.setCSEInfo(&CSEInfo);
406 B.setChangeObserver(Observer);
407
408 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
409 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
410
411 const GCNSubtarget &ST = MF.getSubtarget();
413 const RegisterBankInfo &RBI = *ST.getRegBankInfo();
415 getAnalysis().getUniformityInfo();
416
417
418 const RegBankLegalizeRules &RBLRules = getRules(ST, MRI);
419
420
421 RegBankLegalizeHelper RBLHelper(B, MUI, RBI, RBLRules);
422
424
425 for (MachineBasicBlock &MBB : MF) {
426 for (MachineInstr &MI : MBB) {
428 }
429 }
430
431 for (MachineInstr *MI : AllInst) {
432 if (->isPreISelOpcode())
433 continue;
434
435 unsigned Opc = MI->getOpcode();
436
437 if (Opc == AMDGPU::G_PHI) {
438 if (!RBLHelper.applyMappingPHI(*MI))
439 return false;
440 continue;
441 }
442
443
444
445 if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
446 Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) {
447 RBLHelper.applyMappingTrivial(*MI);
448 continue;
449 }
450
451
452 if (Opc == G_FREEZE &&
454 RBLHelper.applyMappingTrivial(*MI);
455 continue;
456 }
457
458 if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT ||
459 Opc == AMDGPU::G_IMPLICIT_DEF)) {
460 Register Dst = MI->getOperand(0).getReg();
461
463 assert(MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
464 continue;
465 }
466
467
468 }
469
470 if (!RBLHelper.findRuleAndApplyMapping(*MI))
471 return false;
472 }
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497 AMDGPURegBankLegalizeCombiner Combiner(B, *ST.getRegisterInfo(), RBI);
498
499 for (MachineBasicBlock &MBB : MF) {
501 if (MI.getOpcode() == AMDGPU::COPY) {
502 Combiner.tryCombineCopy(MI);
503 continue;
504 }
505 if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
506 Combiner.tryCombineS1AnyExt(MI);
507 continue;
508 }
509 }
510 }
511
513 "Registers with sgpr reg bank and S1 LLT are not legal after "
514 "AMDGPURegBankLegalize. Should lower to sgpr S32");
515
516 return true;
517}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static Register getAnySgprS1(const MachineRegisterInfo &MRI)
Definition AMDGPURegBankLegalize.cpp:374
const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
Definition AMDGPURegBankLegalize.cpp:94
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
Machine IR instance of the generic uniformity analysis.
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Target-Independent Code Generator Pass Configuration Options pass.
std::pair< GUnmerge *, int > tryMatchRALFromUnmerge(Register Src)
Definition AMDGPURegBankLegalize.cpp:158
void replaceRegWithOrBuildCopy(Register Dst, Register Src)
Definition AMDGPURegBankLegalize.cpp:235
AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
Definition AMDGPURegBankLegalize.cpp:122
bool isLaneMask(Register Reg)
Definition AMDGPURegBankLegalize.cpp:140
void tryCombineS1AnyExt(MachineInstr &MI)
Definition AMDGPURegBankLegalize.cpp:319
std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)
Definition AMDGPURegBankLegalize.cpp:150
Register getReadAnyLaneSrc(Register Src)
Definition AMDGPURegBankLegalize.cpp:170
void tryCombineCopy(MachineInstr &MI)
Definition AMDGPURegBankLegalize.cpp:288
bool tryEliminateReadAnyLane(MachineInstr &Copy)
Definition AMDGPURegBankLegalize.cpp:243
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
void addObserver(GISelChangeObserver *O)
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Legacy analysis pass which computes a MachineUniformityInfo.
Holds all the information related to register banks.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const
Returns the CSEConfig object to use for the current optimization level.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createAMDGPURegBankLegalizePass()
Definition AMDGPURegBankLegalize.cpp:90
LLVM_ABI void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver=nullptr)
char & AMDGPURegBankLegalizeID
Definition AMDGPURegBankLegalize.cpp:88