LLVM: lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
25#include "llvm/IR/IntrinsicsAMDGPU.h"
26
27#define DEBUG_TYPE "amdgpu-regbanklegalize"
28
29using namespace llvm;
30using namespace AMDGPU;
31
35 : MF(B.getMF()), ST(MF.getSubtarget<GCNSubtarget>()), B(B),
36 MRI(*B.getMRI()), MUI(MUI), RBI(RBI), MORE(MF, nullptr),
37 RBLRules(RBLRules), IsWave32(ST.isWave32()),
38 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
39 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
40 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
41
44 if (!RuleSet) {
46 "No AMDGPU RegBankLegalize rules defined for opcode",
48 return false;
49 }
50
52 if (!Mapping) {
54 "AMDGPU RegBankLegalize: none of the rules defined with "
55 "'Any' for MI's opcode matched MI",
57 return false;
58 }
59
61 unsigned OpIdx = 0;
63 B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
65 return false;
66 }
68 B.setInstr(MI);
70 return false;
71 }
72
73 if (!lower(MI, *Mapping, WaterfallSgprs))
74 return false;
75
76 return true;
77}
78
79bool RegBankLegalizeHelper::executeInWaterfallLoop(
82
83
85
88
91 unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
92 if (IsWave32) {
93 MovExecOpc = AMDGPU::S_MOV_B32;
94 MovExecTermOpc = AMDGPU::S_MOV_B32_term;
95 XorTermOpc = AMDGPU::S_XOR_B32_term;
96 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
97 ExecReg = AMDGPU::EXEC_LO;
98 } else {
99 MovExecOpc = AMDGPU::S_MOV_B64;
100 MovExecTermOpc = AMDGPU::S_MOV_B64_term;
101 XorTermOpc = AMDGPU::S_XOR_B64_term;
102 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
103 ExecReg = AMDGPU::EXEC;
104 }
105
106#ifndef NDEBUG
107 const int OrigRangeSize = std::distance(Range.begin(), Range.end());
108#endif
109
111 Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
112 Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
113
114
115 B.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(InitSaveExecReg);
116
117 Register SavedExec = MRI.createVirtualRegister(WaveRC);
118
119
120
121
132
136
137
140
141 MBB.addSuccessor(LoopBB);
143
144 B.setInsertPt(*LoopBB, LoopBB->end());
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
192
193
195 auto NewEnd = BodyBB->end();
196 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
197
198 B.setMBB(*LoopBB);
200
204 if (!SGPROperandRegs.count(OldReg))
205 continue;
206
207
208
209 auto OldVal = WaterfalledRegMap.find(OldReg);
210 if (OldVal != WaterfalledRegMap.end()) {
211 Op.setReg(OldVal->second);
212 continue;
213 }
214
216 LLT OpTy = MRI.getType(OpReg);
217
218
219 assert(MRI.getRegBank(OpReg) == VgprRB);
220 Register CurrentLaneReg = MRI.createVirtualRegister({SgprRB, OpTy});
222
223
225 unsigned PartSize = (OpSize % 64 == 0) ? 64 : 32;
227 unsigned NumParts = OpSize / PartSize;
230
231 if (NumParts == 1) {
233 CurrentLaneParts.push_back(CurrentLaneReg);
234 } else {
235 auto UnmergeOp = B.buildUnmerge({VgprRB, PartTy}, OpReg);
236 auto UnmergeCurrLane = B.buildUnmerge({SgprRB, PartTy}, CurrentLaneReg);
237 for (unsigned i = 0; i < NumParts; ++i) {
238 OpParts.push_back(UnmergeOp.getReg(i));
239 CurrentLaneParts.push_back(UnmergeCurrLane.getReg(i));
240 }
241 }
242
243 for (unsigned i = 0; i < NumParts; ++i) {
244 Register CmpReg = MRI.createVirtualRegister(VccRB_S1);
245 B.buildICmp(CmpInst::ICMP_EQ, CmpReg, CurrentLaneParts[i], OpParts[i]);
246
247 if (!CondReg)
248 CondReg = CmpReg;
249 else
250 CondReg = B.buildAnd(VccRB_S1, CondReg, CmpReg).getReg(0);
251 }
252
253 Op.setReg(CurrentLaneReg);
254
255
256 WaterfalledRegMap.insert(std::pair(OldReg, Op.getReg()));
257 }
258 }
259
260
262 MRI.createVirtualRegister({WaveRC, LLT::scalar(IsWave32 ? 32 : 64)});
263 B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
264
265
266 B.buildInstr(AndSaveExecOpc)
267 .addDef(SavedExec)
269 MRI.setSimpleHint(SavedExec, CondRegLM);
270
271 B.setInsertPt(*BodyBB, BodyBB->end());
272
273
274 B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec);
275
276
277
278
279
280 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
281
282
284 B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg);
285
286
287 B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin());
288 B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg);
289
290
291
292 B.setInsertPt(*RemainderBB, RemainderBB->begin());
293
294 return true;
295}
296
297bool RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
299 MachineFunction &MF = B.getMF();
300 assert(MI.getNumMemOperands() == 1);
301 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
302 Register Dst = MI.getOperand(0).getReg();
303 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
305 LLT PtrTy = MRI.getType(Base);
306 const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base);
309
310 unsigned ByteOffset = 0;
311 for (LLT PartTy : LLTBreakdown) {
313 if (ByteOffset == 0) {
314 BasePlusOffset = Base;
315 } else {
316 auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
317 BasePlusOffset =
318 B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0);
319 }
320 auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
321 auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
322 LoadPartRegs.push_back(LoadPart.getReg(0));
324 }
325
326 if (!MergeTy.isValid()) {
327
328 B.buildMergeLikeInstr(Dst, LoadPartRegs);
329 } else {
330
331
334 if (MRI.getType(Reg) == MergeTy) {
336 } else {
337 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg);
338 for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
339 MergeTyParts.push_back(Unmerge.getReg(i));
340 }
341 }
342 B.buildMergeLikeInstr(Dst, MergeTyParts);
343 }
344 MI.eraseFromParent();
345 return true;
346}
347
348bool RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
349 LLT MergeTy) {
350 MachineFunction &MF = B.getMF();
351 assert(MI.getNumMemOperands() == 1);
352 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
353 Register Dst = MI.getOperand(0).getReg();
354 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
356
357 MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy);
358 auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO);
359
361 B.buildTrunc(Dst, WideLoad);
362 } else {
364 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad);
365
366 LLT DstTy = MRI.getType(Dst);
368 for (unsigned i = 0; i < NumElts; ++i) {
369 MergeTyParts.push_back(Unmerge.getReg(i));
370 }
371 B.buildMergeLikeInstr(Dst, MergeTyParts);
372 }
373 MI.eraseFromParent();
374 return true;
375}
376
377bool RegBankLegalizeHelper::widenMMOToS32(GAnyLoad &MI) const {
380 MachineMemOperand &MMO = MI.getMMO();
382
383 MachineMemOperand *WideMMO = B.getMF().getMachineMemOperand(&MMO, 0, S32);
384
385 if (MI.getOpcode() == G_LOAD) {
386 B.buildLoad(Dst, Ptr, *WideMMO);
387 } else {
388 auto Load = B.buildLoad(SgprRB_S32, Ptr, *WideMMO);
389
390 if (MI.getOpcode() == G_ZEXTLOAD) {
392 auto MaskCst = B.buildConstant(SgprRB_S32, Mask);
393 B.buildAnd(Dst, Load, MaskCst);
394 } else {
395 assert(MI.getOpcode() == G_SEXTLOAD);
396 B.buildSExtInReg(Dst, Load, MemSize);
397 }
398 }
399
400 MI.eraseFromParent();
401 return true;
402}
403
404bool RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
405 Register Dst = MI.getOperand(0).getReg();
406 LLT Ty = MRI.getType(Dst);
407 Register Src = MI.getOperand(1).getReg();
408 unsigned Opc = MI.getOpcode();
409 int TrueExtCst = Opc == G_SEXT ? -1 : 1;
410 if (Ty == S32 || Ty == S16) {
411 auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
412 auto False = B.buildConstant({VgprRB, Ty}, 0);
413 B.buildSelect(Dst, Src, True, False);
414 } else if (Ty == S64) {
415 auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
416 auto False = B.buildConstant({VgprRB_S32}, 0);
417 auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
418 MachineInstrBuilder Hi;
419 switch (Opc) {
420 case G_SEXT:
422 break;
423 case G_ZEXT:
424 Hi = False;
425 break;
426 case G_ANYEXT:
427 Hi = B.buildUndef({VgprRB_S32});
428 break;
429 default:
431 MF, MORE, "amdgpu-regbanklegalize",
432 "AMDGPU RegBankLegalize: lowerVccExtToSel, Opcode not supported", MI);
433 return false;
434 }
435
436 B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
437 } else {
439 MF, MORE, "amdgpu-regbanklegalize",
440 "AMDGPU RegBankLegalize: lowerVccExtToSel, Type not supported", MI);
441 return false;
442 }
443
444 MI.eraseFromParent();
445 return true;
446}
447
448std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(Register Reg) {
449 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
450 auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
451 auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
452 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
453 return {Lo.getReg(0), Hi.getReg(0)};
454}
455
456std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(Register Reg) {
457 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
458 auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
459 auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
460 return {Lo.getReg(0), Hi.getReg(0)};
461}
462
463std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
464 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
465 auto Lo = PackedS32;
466 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
467 return {Lo.getReg(0), Hi.getReg(0)};
468}
469
470std::pair<Register, Register>
471RegBankLegalizeHelper::unpackAExtTruncS16(Register Reg) {
472 auto [Lo32, Hi32] = unpackAExt(Reg);
473 return {B.buildTrunc(SgprRB_S16, Lo32).getReg(0),
474 B.buildTrunc(SgprRB_S16, Hi32).getReg(0)};
475}
476
477bool RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
479 switch (MI.getOpcode()) {
480 case AMDGPU::G_SHL: {
481 auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
482 auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
483 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
484 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
485 break;
486 }
487 case AMDGPU::G_LSHR: {
488 auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
489 auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
490 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
491 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
492 break;
493 }
494 case AMDGPU::G_ASHR: {
495 auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
496 auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
497 Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
498 Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
499 break;
500 }
501 default:
503 MF, MORE, "amdgpu-regbanklegalize",
504 "AMDGPU RegBankLegalize: lowerUnpackBitShift, case not implemented",
506 return false;
507 }
508 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
509 MI.eraseFromParent();
510 return true;
511}
512
513bool RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
515 switch (MI.getOpcode()) {
516 case AMDGPU::G_SMIN:
517 case AMDGPU::G_SMAX: {
518
519 auto [Val0_Lo, Val0_Hi] = unpackSExt(MI.getOperand(1).getReg());
520 auto [Val1_Lo, Val1_Hi] = unpackSExt(MI.getOperand(2).getReg());
521 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
522 .getReg(0);
523 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
524 .getReg(0);
525 break;
526 }
527 case AMDGPU::G_UMIN:
528 case AMDGPU::G_UMAX: {
529
530 auto [Val0_Lo, Val0_Hi] = unpackZExt(MI.getOperand(1).getReg());
531 auto [Val1_Lo, Val1_Hi] = unpackZExt(MI.getOperand(2).getReg());
532 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
533 .getReg(0);
534 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
535 .getReg(0);
536 break;
537 }
538 default:
540 MF, MORE, "amdgpu-regbanklegalize",
541 "AMDGPU RegBankLegalize: lowerUnpackMinMax, case not implemented", MI);
542 return false;
543 }
544 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
545 MI.eraseFromParent();
546 return true;
547}
548
549bool RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) {
550 auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg());
551 auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg());
552 auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo});
553 auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi});
554 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(),
555 {ResLo.getReg(0), ResHi.getReg(0)});
556 MI.eraseFromParent();
557 return true;
558}
559
562 return (GI->is(Intrinsic::amdgcn_sbfe));
563
564 return MI.getOpcode() == AMDGPU::G_SBFX;
565}
566
567bool RegBankLegalizeHelper::lowerV_BFE(MachineInstr &MI) {
568 Register Dst = MI.getOperand(0).getReg();
572
573
574 Register Src = MI.getOperand(FirstOpnd).getReg();
575 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
576 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
577
578
579
580
581 unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
582 auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
583
585
586
587
588
589 if (!ConstWidth) {
590 auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
591 auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
592 B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
593 MI.eraseFromParent();
594 return true;
595 }
596
597 uint64_t WidthImm = ConstWidth->Value.getZExtValue();
598 auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
599 Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
600 Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
601 auto Zero = B.buildConstant({VgprRB, S32}, 0);
602 unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
603
604 if (WidthImm <= 32) {
605
606 auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
607 MachineInstrBuilder Hi;
609
610 Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
611 } else {
612
614 }
615 B.buildMergeLikeInstr(Dst, {Lo, Hi});
616 } else {
617 auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
618
619 auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
620 B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
621 }
622
623 MI.eraseFromParent();
624 return true;
625}
626
627bool RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
628 Register DstReg = MI.getOperand(0).getReg();
629 LLT Ty = MRI.getType(DstReg);
632 Register Src = MI.getOperand(FirstOpnd).getReg();
633 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
634 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
635
636
637
638
639
641 auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
642 auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
643 auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
644 unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
645 unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
646 unsigned Opc = Ty == S32 ? Opc32 : Opc64;
647
648
649
650 auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
651 {B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
653 *ST.getRegisterInfo(), RBI)) {
655 MF, MORE, "amdgpu-regbanklegalize",
656 "AMDGPU RegBankLegalize: lowerS_BFE, failed to constrain BFE", MI);
657 return false;
658 }
659
660 B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
661 MI.eraseFromParent();
662 return true;
663}
664
665bool RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
666 Register Dst = MI.getOperand(0).getReg();
667 LLT DstTy = MRI.getType(Dst);
668 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
669 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
670 auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
671 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
672 unsigned Opc = MI.getOpcode();
673 auto Flags = MI.getFlags();
674 auto Lo =
675 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags);
676 auto Hi =
677 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags);
678 B.buildMergeLikeInstr(Dst, {Lo, Hi});
679 MI.eraseFromParent();
680 return true;
681}
682
683bool RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
684 Register Dst = MI.getOperand(0).getReg();
685 assert(MRI.getType(Dst) == V2S16);
686 unsigned Opc = MI.getOpcode();
687 auto Flags = MI.getFlags();
688
689 if (MI.getNumOperands() == 2) {
690 auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
691 auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo}, Flags);
692 auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi}, Flags);
693 B.buildMergeLikeInstr(Dst, {Lo, Hi});
694 MI.eraseFromParent();
695 return true;
696 }
697
698 assert(MI.getNumOperands() == 3);
699 auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
700 auto [Op2Lo, Op2Hi] = unpackAExtTruncS16(MI.getOperand(2).getReg());
701 auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
702 auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
703 B.buildMergeLikeInstr(Dst, {Lo, Hi});
704 MI.eraseFromParent();
705 return true;
706}
707
708bool RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
709 Register Dst = MI.getOperand(0).getReg();
710 LLT DstTy = MRI.getType(Dst);
711 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 ||
713 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
714 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
715 auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
717 auto Flags = MI.getFlags();
718 auto Lo =
719 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
720 auto Hi =
721 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
722
723 B.buildMergeLikeInstr(Dst, {Lo, Hi});
724 MI.eraseFromParent();
725 return true;
726}
727
728bool RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
729 auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
730 int Amt = MI.getOperand(2).getImm();
732
733 if (Amt <= 32) {
734 auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
735 if (Amt == 32) {
736
737 Lo = Freeze.getReg(0);
738 } else {
739
740 Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
741 }
742
743 auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
744 Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0);
745 } else {
746
747 Lo = Op1.getReg(0);
748 Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
749 }
750
751 B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
752 MI.eraseFromParent();
753 return true;
754}
755
756bool RegBankLegalizeHelper::lower(MachineInstr &MI,
758 SmallSet<Register, 4> &WaterfallSgprs) {
759
762 break;
764 return lowerVccExtToSel(MI);
766 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
767 auto True = B.buildConstant({SgprRB, Ty},
768 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
769 auto False = B.buildConstant({SgprRB, Ty}, 0);
770
771
772
773 B.buildSelect(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), True,
774 False);
775 MI.eraseFromParent();
776 return true;
777 }
779 return lowerUnpackBitShift(MI);
781 return lowerUnpackMinMax(MI);
783 return lowerSplitTo16(MI);
785 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
786 MachineInstrBuilder Hi;
787 switch (MI.getOpcode()) {
788 case AMDGPU::G_ZEXT: {
789 Hi = B.buildConstant({RB, S32}, 0);
790 break;
791 }
792 case AMDGPU::G_SEXT: {
793
794 auto ShiftAmt = B.buildConstant({RB, S32}, 31);
795 Hi = B.buildAShr({RB, S32}, MI.getOperand(1).getReg(), ShiftAmt);
796 break;
797 }
798 case AMDGPU::G_ANYEXT: {
799 Hi = B.buildUndef({RB, S32});
800 break;
801 }
802 default:
804 "AMDGPU RegBankLegalize: Ext32To64, unsuported opcode",
806 return false;
807 }
808
809 B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
810 {MI.getOperand(1).getReg(), Hi});
811 MI.eraseFromParent();
812 return true;
813 }
815 uint64_t ConstVal = MI.getOperand(1).getCImm()->getZExtValue();
816 B.buildConstant(MI.getOperand(0).getReg(), ConstVal);
817
818 MI.eraseFromParent();
819 return true;
820 }
822 Register Src = MI.getOperand(1).getReg();
823 LLT Ty = MRI.getType(Src);
824
825
826
827 Register BoolSrc = MRI.createVirtualRegister({VgprRB, Ty});
828 if (Ty == S64) {
829 auto Src64 = B.buildUnmerge(VgprRB_S32, Src);
830 auto One = B.buildConstant(VgprRB_S32, 1);
831 auto AndLo = B.buildAnd(VgprRB_S32, Src64.getReg(0), One);
832 auto Zero = B.buildConstant(VgprRB_S32, 0);
833 auto AndHi = B.buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
834 B.buildMergeLikeInstr(BoolSrc, {AndLo, AndHi});
835 } else {
836 assert(Ty == S32 || Ty == S16);
837 auto One = B.buildConstant({VgprRB, Ty}, 1);
838 B.buildAnd(BoolSrc, Src, One);
839 }
840 auto Zero = B.buildConstant({VgprRB, Ty}, 0);
841 B.buildICmp(CmpInst::ICMP_NE, MI.getOperand(0).getReg(), BoolSrc, Zero);
842 MI.eraseFromParent();
843 return true;
844 }
846 return lowerV_BFE(MI);
848 return lowerS_BFE(MI);
850 return lowerSplitTo32(MI);
852 return lowerSplitTo32Select(MI);
854 return lowerSplitTo32SExtInReg(MI);
856 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
858
859 if (Size > 128) {
864 } else {
866 }
867 if (Size / 128 == 2)
869 else if (Size / 128 == 4)
871 else {
873 "AMDGPU RegBankLegalize: SplitLoad, unsuported type",
875 return false;
876 }
877 }
878
879 else if (DstTy == S96)
880 splitLoad(MI, {S64, S32}, S32);
881 else if (DstTy == V3S32)
882 splitLoad(MI, {V2S32, S32}, S32);
883 else if (DstTy == V6S16)
884 splitLoad(MI, {V4S16, V2S16}, V2S16);
885 else {
887 "AMDGPU RegBankLegalize: SplitLoad, unsuported type",
889 return false;
890 }
891 return true;
892 }
894 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
895 if (DstTy == S96)
896 widenLoad(MI, S128);
897 else if (DstTy == V3S32)
898 widenLoad(MI, V4S32, S32);
899 else if (DstTy == V6S16)
900 widenLoad(MI, V8S16, V2S16);
901 else {
903 "AMDGPU RegBankLegalize: WidenLoad, unsuported type",
905 return false;
906 }
907 return true;
908 }
910 return lowerUnpackAExt(MI);
913 }
914
915 if (!WaterfallSgprs.empty()) {
917 if (!executeInWaterfallLoop(B, make_range(I, std::next(I)), WaterfallSgprs))
918 return false;
919 }
920 return true;
921}
922
924 switch (ID) {
925 case Vcc:
980 default:
981 return LLT();
982 }
983}
984
986 switch (ID) {
992 return Ty;
993 return LLT();
996 return isAnyPtr(Ty, 32) ? Ty : LLT();
999 return isAnyPtr(Ty, 64) ? Ty : LLT();
1002 return isAnyPtr(Ty, 128) ? Ty : LLT();
1008 return Ty;
1009 return LLT();
1015 return Ty;
1016 return LLT();
1022 return Ty;
1023 return LLT();
1029 return Ty;
1030 return LLT();
1036 return Ty;
1037 return LLT();
1038 default:
1039 return LLT();
1040 }
1041}
1042
1043const RegisterBank *
1045 switch (ID) {
1046 case Vcc:
1047 return VccRB;
1089 return SgprRB;
1113 return VgprRB;
1114 default:
1115 return nullptr;
1116 }
1117}
1118
1119bool RegBankLegalizeHelper::applyMappingDst(
1120 MachineInstr &MI, unsigned &OpIdx,
1121 const SmallVectorImpl &MethodIDs) {
1122
1125 continue;
1126 MachineOperand &Op = MI.getOperand(OpIdx);
1128 LLT Ty = MRI.getType(Reg);
1129 [[maybe_unused]] const RegisterBank *RB = MRI.getRegBank(Reg);
1130
1131 switch (MethodIDs[OpIdx]) {
1132
1133 case Vcc:
1159 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1160 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
1161 break;
1162 }
1163
1182 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
1183 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
1184 break;
1185 }
1186
1189 assert(RB == SgprRB);
1190 Register NewDst = MRI.createVirtualRegister(VccRB_S1);
1191 Op.setReg(NewDst);
1192 auto CopyS32_Vcc =
1193 B.buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
1194 B.buildTrunc(Reg, CopyS32_Vcc);
1195 break;
1196 }
1198 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1199 assert(RB == SgprRB);
1200 Register NewVgprDstS16 = MRI.createVirtualRegister({VgprRB, S16});
1201 Register NewVgprDstS32 = MRI.createVirtualRegister({VgprRB, S32});
1202 Register NewSgprDstS32 = MRI.createVirtualRegister({SgprRB, S32});
1203 Op.setReg(NewVgprDstS16);
1204 B.buildAnyExt(NewVgprDstS32, NewVgprDstS16);
1206 B.buildTrunc(Reg, NewSgprDstS32);
1207 break;
1208 }
1213 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1214 assert(RB == SgprRB);
1215 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1216 Op.setReg(NewVgprDst);
1218 break;
1219 }
1226 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
1227 assert(RB == SgprRB);
1228 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1229 Op.setReg(NewVgprDst);
1231 break;
1232 }
1233
1236 assert(RB == SgprRB);
1237 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1238 Op.setReg(NewDst);
1239 if (!MRI.use_empty(Reg))
1240 B.buildTrunc(Reg, NewDst);
1241 break;
1242 }
1245 MF, MORE, "amdgpu-regbanklegalize",
1246 "AMDGPU RegBankLegalize: missing fast rule ('Div' or 'Uni') for", MI);
1247 return false;
1248 }
1249 default:
1251 MF, MORE, "amdgpu-regbanklegalize",
1252 "AMDGPU RegBankLegalize: applyMappingDst, ID not supported", MI);
1253 return false;
1254 }
1255 }
1256
1257 return true;
1258}
1259
1260bool RegBankLegalizeHelper::applyMappingSrc(
1261 MachineInstr &MI, unsigned &OpIdx,
1262 const SmallVectorImpl &MethodIDs,
1263 SmallSet<Register, 4> &SgprWaterfallOperandRegs) {
1264 for (unsigned i = 0; i < MethodIDs.size(); ++OpIdx, ++i) {
1265 if (MethodIDs[i] == None || MethodIDs[i] == IntrId || MethodIDs[i] == Imm)
1266 continue;
1267
1268 MachineOperand &Op = MI.getOperand(OpIdx);
1270 LLT Ty = MRI.getType(Reg);
1271 const RegisterBank *RB = MRI.getRegBank(Reg);
1272
1273 switch (MethodIDs[i]) {
1274 case Vcc: {
1276 assert(RB == VccRB || RB == SgprRB);
1277 if (RB == SgprRB) {
1278 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1279 auto CopyVcc_Scc =
1280 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
1281 Op.setReg(CopyVcc_Scc.getReg(0));
1282 }
1283 break;
1284 }
1285
1299 assert(Ty == getTyFromID(MethodIDs[i]));
1300 assert(RB == getRegBankFromID(MethodIDs[i]));
1301 break;
1302 }
1303
1313 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1314 assert(RB == getRegBankFromID(MethodIDs[i]));
1315 break;
1316 }
1317
1330 assert(Ty == getTyFromID(MethodIDs[i]));
1331 if (RB != VgprRB) {
1332 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
1333 Op.setReg(CopyToVgpr.getReg(0));
1334 }
1335 break;
1336 }
1337
1347 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1348 if (RB != VgprRB) {
1349 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
1350 Op.setReg(CopyToVgpr.getReg(0));
1351 }
1352 break;
1353 }
1354
1357 assert(Ty == getTyFromID(MethodIDs[i]));
1358 if (RB != SgprRB)
1359 SgprWaterfallOperandRegs.insert(Reg);
1360 break;
1361 }
1362
1364
1366 assert(RB == SgprRB);
1367 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1368 Op.setReg(Aext.getReg(0));
1369 break;
1370 }
1372
1374 assert(RB == SgprRB);
1375 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1376
1377
1378 auto Cst1 = B.buildConstant(SgprRB_S32, 1);
1379 auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
1380 Op.setReg(BoolInReg.getReg(0));
1381 break;
1382 }
1385 assert(RB == SgprRB);
1386 auto Sext = B.buildSExt(SgprRB_S32, Reg);
1387 Op.setReg(Sext.getReg(0));
1388 break;
1389 }
1392 assert(RB == SgprRB);
1393 auto Zext = B.buildZExt({SgprRB, S32}, Reg);
1394 Op.setReg(Zext.getReg(0));
1395 break;
1396 }
1398
1400 assert(RB == VgprRB);
1401 auto Sext = B.buildSExt({VgprRB, S32}, Reg);
1402 Op.setReg(Sext.getReg(0));
1403 break;
1404 }
1406
1408 assert(RB == VgprRB);
1409 auto Zext = B.buildZExt({VgprRB, S32}, Reg);
1410 Op.setReg(Zext.getReg(0));
1411 break;
1412 }
1413 default:
1415 MF, MORE, "amdgpu-regbanklegalize",
1416 "AMDGPU RegBankLegalize: applyMappingSrc, ID not supported", MI);
1417 return false;
1418 }
1419 }
1420 return true;
1421}
1422
1424 Register Dst = MI.getOperand(0).getReg();
1425 LLT Ty = MRI.getType(Dst);
1426
1427 if (Ty == LLT::scalar(1) && MUI.isUniform(Dst)) {
1428 B.setInsertPt(*MI.getParent(), MI.getParent()->getFirstNonPHI());
1429
1430 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1431 MI.getOperand(0).setReg(NewDst);
1432 B.buildTrunc(Dst, NewDst);
1433
1434 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1436
1437 auto DefMI = MRI.getVRegDef(UseReg)->getIterator();
1439
1441
1442 auto NewUse = B.buildAnyExt(SgprRB_S32, UseReg);
1443 MI.getOperand(i).setReg(NewUse.getReg(0));
1444 }
1445
1446 return true;
1447 }
1448
1449
1450
1451
1452 if (Ty == LLT::scalar(1) && MUI.isDivergent(Dst)) {
1454 "AMDGPU RegBankLegalize: Can't lower divergent S1 G_PHI",
1455 MI);
1456 return false;
1457 }
1458
1459
1460
1461
1464 return true;
1465 }
1466
1468 "AMDGPU RegBankLegalize: type not supported for G_PHI",
1469 MI);
1470 return false;
1471}
1472
1476 unsigned StartOpIdx,
1477 unsigned EndOpIdx) {
1478 for (unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
1479 if (MRI.getRegBankOrNull(MI.getOperand(i).getReg()) != RB)
1480 return false;
1481 }
1482 return true;
1483}
1484
1486 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
1487
1488 unsigned NumDefs = MI.getNumDefs();
1489 unsigned NumOperands = MI.getNumOperands();
1490
1492 if (RB == SgprRB)
1494
1495 if (RB == VgprRB) {
1496 B.setInstr(MI);
1497 for (unsigned i = NumDefs; i < NumOperands; ++i) {
1498 Register Reg = MI.getOperand(i).getReg();
1499 if (MRI.getRegBank(Reg) != RB) {
1500 auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
1501 MI.getOperand(i).setReg(Copy.getReg(0));
1502 }
1503 }
1504 }
1505}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Provides AMDGPU specific target descriptions.
static bool isSignedBFE(MachineInstr &MI)
Definition AMDGPURegBankLegalizeHelper.cpp:560
static bool verifyRegBankOnOperands(MachineInstr &MI, const RegisterBank *RB, MachineRegisterInfo &MRI, unsigned StartOpIdx, unsigned EndOpIdx)
Definition AMDGPURegBankLegalizeHelper.cpp:1473
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
static Register UseReg(const MachineOperand &MO)
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
bool findRuleAndApplyMapping(MachineInstr &MI)
Definition AMDGPURegBankLegalizeHelper.cpp:42
bool applyMappingPHI(MachineInstr &MI)
Definition AMDGPURegBankLegalizeHelper.cpp:1423
void applyMappingTrivial(MachineInstr &MI)
Definition AMDGPURegBankLegalizeHelper.cpp:1485
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI, const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
Definition AMDGPURegBankLegalizeHelper.cpp:32
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
const SIRegisterInfo * getRegisterInfo() const override
Represents a call to an intrinsic.
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
Representation of each machine instruction.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
A range adaptor for a pair of iterators.
bool isAnyPtr(LLT Ty, unsigned Width)
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI void reportGISelFailure(MachineFunction &MF, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R)
Report an ISel error as a missed optimization remark to the LLVMContext's diagnostic stream.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LoweringMethodID LoweringMethod
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping