LLVM: lib/Target/AMDGPU/SIPeepholeSDWA.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
29#include
30
31using namespace llvm;
32
33#define DEBUG_TYPE "si-peephole-sdwa"
34
35STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
37 "Number of instruction converted to SDWA.");
38
39namespace {
40
43class SDWAOperand;
44class SDWADstOperand;
45
48
49class SIPeepholeSDWA {
50private:
54
56 SDWAOperandsMap PotentialMatches;
58
59 std::optional<int64_t> foldToImm(const MachineOperand &Op) const;
60
62 std::unique_ptr matchSDWAOperand(MachineInstr &MI);
67 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
69
70public:
72};
73
75public:
76 static char ID;
77
78 SIPeepholeSDWALegacy() : MachineFunctionPass(ID) {}
79
80 StringRef getPassName() const override { return "SI Peephole SDWA"; }
81
82 bool runOnMachineFunction(MachineFunction &MF) override;
83
84 void getAnalysisUsage(AnalysisUsage &AU) const override {
87 }
88};
89
91
92class SDWAOperand {
93private:
94 MachineOperand *Target;
95 MachineOperand *Replaced;
96
97
98
99 virtual bool canCombineSelections(const MachineInstr &MI,
100 const SIInstrInfo *TII) = 0;
101
102public:
103 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
104 : Target(TargetOp), Replaced(ReplacedOp) {
105 assert(Target->isReg());
106 assert(Replaced->isReg());
107 }
108
109 virtual ~SDWAOperand() = default;
110
111 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII,
112 const GCNSubtarget &ST,
113 SDWAOperandsMap *PotentialMatches = nullptr) = 0;
114 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
115
116 MachineOperand *getTargetOperand() const { return Target; }
117 MachineOperand *getReplacedOperand() const { return Replaced; }
118 MachineInstr *getParentInst() const { return Target->getParent(); }
119
120 MachineRegisterInfo *getMRI() const {
121 return &getParentInst()->getMF()->getRegInfo();
122 }
123
124#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
125 virtual void print(raw_ostream& OS) const = 0;
127#endif
128};
129
130class SDWASrcOperand : public SDWAOperand {
131private:
133 bool Abs;
134 bool Neg;
135 bool Sext;
136
137public:
138 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
139 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
140 bool Sext_ = false)
141 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
142 Neg(Neg_), Sext(Sext_) {}
143
144 MachineInstr *potentialToConvert(const SIInstrInfo *TII,
145 const GCNSubtarget &ST,
146 SDWAOperandsMap *PotentialMatches = nullptr) override;
147 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
148 bool canCombineSelections(const MachineInstr &MI,
149 const SIInstrInfo *TII) override;
150
151 SdwaSel getSrcSel() const { return SrcSel; }
152 bool getAbs() const { return Abs; }
153 bool getNeg() const { return Neg; }
154 bool getSext() const { return Sext; }
155
156 uint64_t getSrcMods(const SIInstrInfo *TII,
157 const MachineOperand *SrcOp) const;
158
159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
160 void print(raw_ostream& OS) const override;
161#endif
162};
163
164class SDWADstOperand : public SDWAOperand {
165private:
168
169public:
170 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
172 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
173
174 MachineInstr *potentialToConvert(const SIInstrInfo *TII,
175 const GCNSubtarget &ST,
176 SDWAOperandsMap *PotentialMatches = nullptr) override;
177 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
178 bool canCombineSelections(const MachineInstr &MI,
179 const SIInstrInfo *TII) override;
180
181 SdwaSel getDstSel() const { return DstSel; }
182 DstUnused getDstUnused() const { return DstUn; }
183
184#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
185 void print(raw_ostream& OS) const override;
186#endif
187};
188
189class SDWADstPreserveOperand : public SDWADstOperand {
190private:
191 MachineOperand *Preserve;
192
193public:
194 SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
195 MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD)
196 : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
197 Preserve(PreserveOp) {}
198
199 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
200 bool canCombineSelections(const MachineInstr &MI,
201 const SIInstrInfo *TII) override;
202
203 MachineOperand *getPreservedOperand() const { return Preserve; }
204
205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
206 void print(raw_ostream& OS) const override;
207#endif
208};
209
210}
211
213 false)
214
215char SIPeepholeSDWALegacy::ID = 0;
216
218
220 return new SIPeepholeSDWALegacy();
221}
222
223#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
225 switch(Sel) {
226 case BYTE_0: OS << "BYTE_0"; break;
227 case BYTE_1: OS << "BYTE_1"; break;
228 case BYTE_2: OS << "BYTE_2"; break;
229 case BYTE_3: OS << "BYTE_3"; break;
230 case WORD_0: OS << "WORD_0"; break;
231 case WORD_1: OS << "WORD_1"; break;
232 case DWORD: OS << "DWORD"; break;
233 }
234 return OS;
235}
236
238 switch(Un) {
239 case UNUSED_PAD: OS << "UNUSED_PAD"; break;
240 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
242 }
243 return OS;
244}
245
247void SDWASrcOperand::print(raw_ostream& OS) const {
248 OS << "SDWA src: " << *getTargetOperand()
249 << " src_sel:" << getSrcSel()
250 << " abs:" << getAbs() << " neg:" << getNeg()
251 << " sext:" << getSext() << '\n';
252}
253
255void SDWADstOperand::print(raw_ostream& OS) const {
256 OS << "SDWA dst: " << *getTargetOperand()
257 << " dst_sel:" << getDstSel()
258 << " dst_unused:" << getDstUnused() << '\n';
259}
260
262void SDWADstPreserveOperand::print(raw_ostream& OS) const {
263 OS << "SDWA preserve dst: " << *getTargetOperand()
264 << " dst_sel:" << getDstSel()
265 << " preserve:" << *getPreservedOperand() << '\n';
266}
267
268#endif
269
275 if (To.isUse()) {
277 } else {
279 }
280}
281
283 return LHS.isReg() &&
284 RHS.isReg() &&
285 LHS.getReg() == RHS.getReg() &&
286 LHS.getSubReg() == RHS.getSubReg();
287}
288
291 if (->isReg() ||
->isDef())
292 return nullptr;
293
294 return MRI->getOneNonDBGUse(Reg->getReg());
295}
296
299 if (->isReg())
300 return nullptr;
301
302 return MRI->getOneDef(Reg->getReg());
303}
304
305
306
307
308
309
310
312 if (Sel == SdwaSel::DWORD)
313 return OperandSel;
314
315 if (Sel == OperandSel || OperandSel == SdwaSel::DWORD)
316 return Sel;
317
318 if (Sel == SdwaSel::WORD_1 || Sel == SdwaSel::BYTE_2 ||
319 Sel == SdwaSel::BYTE_3)
320 return {};
321
322 if (OperandSel == SdwaSel::WORD_0)
323 return Sel;
324
325 if (OperandSel == SdwaSel::WORD_1) {
326 if (Sel == SdwaSel::BYTE_0)
327 return SdwaSel::BYTE_2;
328 if (Sel == SdwaSel::BYTE_1)
329 return SdwaSel::BYTE_3;
330 if (Sel == SdwaSel::WORD_0)
331 return SdwaSel::WORD_1;
332 }
333
334 return {};
335}
336
337uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
338 const MachineOperand *SrcOp) const {
339 uint64_t Mods = 0;
341 if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
342 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
343 Mods = Mod->getImm();
344 }
345 } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
346 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
347 Mods = Mod->getImm();
348 }
349 }
350 if (Abs || Neg) {
352 "Float and integer src modifiers can't be set simultaneously");
355 } else if (Sext) {
357 }
358
359 return Mods;
360}
361
362MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
363 const GCNSubtarget &ST,
364 SDWAOperandsMap *PotentialMatches) {
365 if (PotentialMatches != nullptr) {
366
367 MachineOperand *Reg = getReplacedOperand();
368 if (->isReg() ||
->isDef())
369 return nullptr;
370
371 for (MachineInstr &UseMI : getMRI()->use_nodbg_instructions(Reg->getReg()))
372
373 if (!isConvertibleToSDWA(UseMI, ST, TII) ||
374 !canCombineSelections(UseMI, TII))
375 return nullptr;
376
377
378
379 for (MachineOperand &UseMO : getMRI()->use_nodbg_operands(Reg->getReg())) {
380
382
383 SDWAOperandsMap &potentialMatchesMap = *PotentialMatches;
384 MachineInstr *UseMI = UseMO.getParent();
385 potentialMatchesMap[UseMI].push_back(this);
386 }
387 return nullptr;
388 }
389
390
391
392 MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI());
393 if (!PotentialMO)
394 return nullptr;
395
396 MachineInstr *Parent = PotentialMO->getParent();
397
398 return canCombineSelections(*Parent, TII) ? Parent : nullptr;
399}
400
401bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
402 switch (MI.getOpcode()) {
403 case AMDGPU::V_CVT_F32_FP8_sdwa:
404 case AMDGPU::V_CVT_F32_BF8_sdwa:
405 case AMDGPU::V_CVT_PK_F32_FP8_sdwa:
406 case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
407
408 return false;
409 case AMDGPU::V_CNDMASK_B32_sdwa:
410
411
412
413
414
415
416
417
418
419
420
421 if (Sext)
422 return false;
423 break;
424 }
425
426
427
428 bool IsPreserveSrc = false;
429 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
430 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
431 MachineOperand *SrcMods =
432 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
433 assert(Src && (Src->isReg() || Src->isImm()));
434 if ((*Src, *getReplacedOperand())) {
435
436 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
437 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
438 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
439
440 if (!Src ||
441 (*Src, *getReplacedOperand())) {
442
443
444
445
446
447
448 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
450 TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
451
452 if (Dst &&
453 DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
454
455
456
457
459 TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel));
460 if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
461 getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
462 IsPreserveSrc = true;
463 auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
464 AMDGPU::OpName::vdst);
465 auto TiedIdx = MI.findTiedOperandIdx(DstIdx);
466 Src = &MI.getOperand(TiedIdx);
467 SrcSel = nullptr;
468 SrcMods = nullptr;
469 } else {
470
471 return false;
472 }
473 }
474 }
475 assert(Src && Src->isReg());
476
477 if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
478 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
479 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
480 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
481 (*Src, *getReplacedOperand())) {
482
483
484 return false;
485 }
486
488 (IsPreserveSrc || (SrcSel && SrcMods)));
489 }
491 if (!IsPreserveSrc) {
494 SrcMods->setImm(getSrcMods(TII, Src));
495 }
496 getTargetOperand()->setIsKill(false);
497 return true;
498}
499
500
501
503 AMDGPU::OpName SrcSelOpName, SdwaSel OpSel) {
505
508
510}
511
512
513
514
516 AMDGPU::OpName SrcOpName,
520
523 return true;
524
526}
527
528bool SDWASrcOperand::canCombineSelections(const MachineInstr &MI,
529 const SIInstrInfo *TII) {
530 if (->isSDWA(MI.getOpcode()))
531 return true;
532
533 using namespace AMDGPU;
534
536 getReplacedOperand(), getSrcSel()) &&
538 getReplacedOperand(), getSrcSel());
539}
540
541MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,
542 const GCNSubtarget &ST,
543 SDWAOperandsMap *PotentialMatches) {
544
545
546 MachineRegisterInfo *MRI = getMRI();
547 MachineInstr *ParentMI = getParentInst();
548
549 MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI);
550 if (!PotentialMO)
551 return nullptr;
552
553
554 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) {
555 if (&UseInst != ParentMI)
556 return nullptr;
557 }
558
559 MachineInstr *Parent = PotentialMO->getParent();
560 return canCombineSelections(*Parent, TII) ? Parent : nullptr;
561}
562
563bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
564
565
566 if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
567 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
568 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
569 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
571
572 return false;
573 }
574
575 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
577 Operand->isReg() &&
578 isSameReg(*Operand, *getReplacedOperand()));
580 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
582
585
586 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
588 DstUnused->setImm(getDstUnused());
589
590
591
592 getParentInst()->eraseFromParent();
593 return true;
594}
595
596bool SDWADstOperand::canCombineSelections(const MachineInstr &MI,
597 const SIInstrInfo *TII) {
598 if (->isSDWA(MI.getOpcode()))
599 return true;
600
602}
603
604bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
605 const SIInstrInfo *TII) {
606
607
608
609 for (MachineOperand &MO : MI.uses()) {
610 if (!MO.isReg())
611 continue;
612 getMRI()->clearKillFlags(MO.getReg());
613 }
614
615
616 MI.getParent()->remove(&MI);
617 getParentInst()->getParent()->insert(getParentInst(), &MI);
618
619
620 MachineInstrBuilder MIB(*MI.getMF(), MI);
621 MIB.addReg(getPreservedOperand()->getReg(),
623 getPreservedOperand()->getSubReg());
624
625
626 MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst),
627 MI.getNumOperands() - 1);
628
629
630 return SDWADstOperand::convertToSDWA(MI, TII);
631}
632
633bool SDWADstPreserveOperand::canCombineSelections(const MachineInstr &MI,
634 const SIInstrInfo *TII) {
635 return SDWADstOperand::canCombineSelections(MI, TII);
636}
637
638std::optional<int64_t>
639SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
640 if (Op.isImm()) {
641 return Op.getImm();
642 }
643
644
645
646 if (Op.isReg()) {
647 for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
649 continue;
650
651 const MachineInstr *DefInst = Def.getParent();
652 if (->isFoldableCopy(*DefInst))
653 return std::nullopt;
654
655 const MachineOperand &Copied = DefInst->getOperand(1);
656 if (!Copied.isImm())
657 return std::nullopt;
658
659 return Copied.getImm();
660 }
661 }
662
663 return std::nullopt;
664}
665
666std::unique_ptr
667SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
668 unsigned Opcode = MI.getOpcode();
669 switch (Opcode) {
670 case AMDGPU::V_LSHRREV_B32_e32:
671 case AMDGPU::V_ASHRREV_I32_e32:
672 case AMDGPU::V_LSHLREV_B32_e32:
673 case AMDGPU::V_LSHRREV_B32_e64:
674 case AMDGPU::V_ASHRREV_I32_e64:
675 case AMDGPU::V_LSHLREV_B32_e64: {
676
677
678
679
680
681
682
683
684 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
685 auto Imm = foldToImm(*Src0);
686 if (!Imm)
687 break;
688
689 if (*Imm != 16 && *Imm != 24)
690 break;
691
692 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
693 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
695 Dst->getReg().isPhysical())
696 break;
697
698 if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
699 Opcode == AMDGPU::V_LSHLREV_B32_e64) {
700 return std::make_unique(
702 }
703 return std::make_unique(
704 Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
705 Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
706 Opcode != AMDGPU::V_LSHRREV_B32_e64);
707 break;
708 }
709
710 case AMDGPU::V_LSHRREV_B16_e32:
711 case AMDGPU::V_ASHRREV_I16_e32:
712 case AMDGPU::V_LSHLREV_B16_e32:
713 case AMDGPU::V_LSHRREV_B16_e64:
714 case AMDGPU::V_LSHRREV_B16_opsel_e64:
715 case AMDGPU::V_ASHRREV_I16_e64:
716 case AMDGPU::V_LSHLREV_B16_opsel_e64:
717 case AMDGPU::V_LSHLREV_B16_e64: {
718
719
720
721
722
723
724
725
726 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
727 auto Imm = foldToImm(*Src0);
728 if (!Imm || *Imm != 8)
729 break;
730
731 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
732 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
733
735 Dst->getReg().isPhysical())
736 break;
737
738 if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
739 Opcode == AMDGPU::V_LSHLREV_B16_opsel_e64 ||
740 Opcode == AMDGPU::V_LSHLREV_B16_e64)
741 return std::make_unique(Dst, Src1, BYTE_1, UNUSED_PAD);
742 return std::make_unique(
743 Src1, Dst, BYTE_1, false, false,
744 Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
745 Opcode != AMDGPU::V_LSHRREV_B16_opsel_e64 &&
746 Opcode != AMDGPU::V_LSHRREV_B16_e64);
747 break;
748 }
749
750 case AMDGPU::V_BFE_I32_e64:
751 case AMDGPU::V_BFE_U32_e64: {
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
767 auto Offset = foldToImm(*Src1);
769 break;
770
771 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
772 auto Width = foldToImm(*Src2);
773 if (!Width)
774 break;
775
777
778 if (*Offset == 0 && *Width == 8)
780 else if (*Offset == 0 && *Width == 16)
782 else if (*Offset == 0 && *Width == 32)
784 else if (*Offset == 8 && *Width == 8)
786 else if (*Offset == 16 && *Width == 8)
788 else if (*Offset == 16 && *Width == 16)
790 else if (*Offset == 24 && *Width == 8)
792 else
793 break;
794
795 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
796 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
797
799 Dst->getReg().isPhysical())
800 break;
801
802 return std::make_unique(
803 Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32_e64);
804 }
805
806 case AMDGPU::V_AND_B32_e32:
807 case AMDGPU::V_AND_B32_e64: {
808
809
810
811
812 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
813 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
814 auto *ValSrc = Src1;
815 auto Imm = foldToImm(*Src0);
816
817 if (!Imm) {
818 Imm = foldToImm(*Src1);
819 ValSrc = Src0;
820 }
821
822 if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
823 break;
824
825 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
826
827 if (!ValSrc->isReg() || ValSrc->getReg().isPhysical() ||
828 Dst->getReg().isPhysical())
829 break;
830
831 return std::make_unique(
832 ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
833 }
834
835 case AMDGPU::V_OR_B32_e32:
836 case AMDGPU::V_OR_B32_e64: {
837
838
839
840
841
842
843
844
845
846 using CheckRetType =
847 std::optional<std::pair<MachineOperand *, MachineOperand *>>;
848 auto CheckOROperandsForSDWA =
849 [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType {
850 if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg())
851 return CheckRetType(std::nullopt);
852
854 if (!Op1Def)
855 return CheckRetType(std::nullopt);
856
857 MachineInstr *Op1Inst = Op1Def->getParent();
858 if (->isSDWA(*Op1Inst))
859 return CheckRetType(std::nullopt);
860
862 if (!Op2Def)
863 return CheckRetType(std::nullopt);
864
865 return CheckRetType(std::pair(Op1Def, Op2Def));
866 };
867
868 MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
869 MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
870 assert(OrSDWA && OrOther);
871 auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
872 if (!Res) {
873 OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
874 OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
875 assert(OrSDWA && OrOther);
876 Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
877 if (!Res)
878 break;
879 }
880
881 MachineOperand *OrSDWADef = Res->first;
882 MachineOperand *OrOtherDef = Res->second;
883 assert(OrSDWADef && OrOtherDef);
884
885 MachineInstr *SDWAInst = OrSDWADef->getParent();
886 MachineInstr *OtherInst = OrOtherDef->getParent();
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908 if (->isSDWA(*OtherInst))
909 break;
910
912 TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));
914 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
915
916 bool DstSelAgree = false;
917 switch (DstSel) {
918 case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) ||
919 (OtherDstSel == BYTE_3) ||
920 (OtherDstSel == WORD_1));
921 break;
922 case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
923 (OtherDstSel == BYTE_1) ||
924 (OtherDstSel == WORD_0));
925 break;
926 case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) ||
927 (OtherDstSel == BYTE_2) ||
928 (OtherDstSel == BYTE_3) ||
929 (OtherDstSel == WORD_1));
930 break;
931 case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) ||
932 (OtherDstSel == BYTE_2) ||
933 (OtherDstSel == BYTE_3) ||
934 (OtherDstSel == WORD_1));
935 break;
936 case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) ||
937 (OtherDstSel == BYTE_1) ||
938 (OtherDstSel == BYTE_3) ||
939 (OtherDstSel == WORD_0));
940 break;
941 case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) ||
942 (OtherDstSel == BYTE_1) ||
943 (OtherDstSel == BYTE_2) ||
944 (OtherDstSel == WORD_0));
945 break;
946 default: DstSelAgree = false;
947 }
948
949 if (!DstSelAgree)
950 break;
951
952
954 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
955 if (OtherDstUnused != DstUnused::UNUSED_PAD)
956 break;
957
958
959 MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
961
962 return std::make_unique(
963 OrDst, OrSDWADef, OrOtherDef, DstSel);
964
965 }
966 }
967
968 return std::unique_ptr(nullptr);
969}
970
971#if !defined(NDEBUG)
973 Operand.print(OS);
974 return OS;
975}
976#endif
977
978void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
979 for (MachineInstr &MI : MBB) {
980 if (auto Operand = matchSDWAOperand(MI)) {
981 LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
982 SDWAOperands[&MI] = std::move(Operand);
983 ++NumSDWAPatternsFound;
984 }
985 }
986}
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
1007 const GCNSubtarget &ST) const {
1008 int Opc = MI.getOpcode();
1009 assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
1010 "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
1011
1012
1013 if (->canShrink(MI, *MRI))
1014 return;
1016
1017 const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
1018 if (!Sdst)
1019 return;
1021 if (!NextOp)
1022 return;
1023 MachineInstr &MISucc = *NextOp->getParent();
1024
1025
1026 MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
1027 if (!CarryIn)
1028 return;
1029 MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
1030 if (!CarryOut)
1031 return;
1032 if (->hasOneNonDBGUse(CarryIn->getReg()) ||
1033 ->use_nodbg_empty(CarryOut->getReg()))
1034 return;
1035
1036 MachineBasicBlock &MBB = *MI.getParent();
1040 return;
1041
1042 for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
1044 if (I->modifiesRegister(AMDGPU::VCC, TRI))
1045 return;
1046 }
1047
1048
1050 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
1051 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
1052 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
1054
1055 MI.eraseFromParent();
1056
1057
1058
1060}
1061
1062
1063
1064
1065
1066
1067void SIPeepholeSDWA::convertVcndmaskToVOP2(MachineInstr &MI,
1068 const GCNSubtarget &ST) const {
1069 assert(MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64);
1070
1072 if (->canShrink(MI, *MRI)) {
1073 LLVM_DEBUG(dbgs() << "Cannot shrink instruction\n");
1074 return;
1075 }
1076
1077 const MachineOperand &CarryIn =
1078 *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1080 MachineInstr *CarryDef = MRI->getVRegDef(CarryReg);
1081 if (!CarryDef) {
1082 LLVM_DEBUG(dbgs() << "Missing carry-in operand definition\n");
1083 return;
1084 }
1085
1086
1087 MCRegister Vcc = TRI->getVCC();
1088 MachineBasicBlock &MBB = *MI.getParent();
1092 LLVM_DEBUG(dbgs() << "VCC not known to be dead before instruction\n");
1093 return;
1094 }
1095
1097
1100 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
1101 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
1102 .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
1104 TII->fixImplicitOperands(*Converted);
1105 LLVM_DEBUG(dbgs() << "Converted to VOP2: " << *Converted);
1106 (void)Converted;
1107 MI.eraseFromParent();
1108}
1109
1110namespace {
1111bool isConvertibleToSDWA(MachineInstr &MI,
1112 const GCNSubtarget &ST,
1113 const SIInstrInfo* TII) {
1114
1115 unsigned Opc = MI.getOpcode();
1117 return true;
1118
1119
1120
1121 if (Opc == AMDGPU::V_CNDMASK_B32_e64)
1122 return false;
1123
1124
1127
1129 return false;
1130
1131 if (.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1132 return false;
1133
1135 if (.hasSDWASdst()) {
1136 const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
1137 if (SDst && (SDst->getReg() != AMDGPU::VCC &&
1138 SDst->getReg() != AMDGPU::VCC_LO))
1139 return false;
1140 }
1141
1142 if (.hasSDWAOutModsVOPC() &&
1143 (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
1144 TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
1145 return false;
1146
1147 } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
1148 ->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
1149 return false;
1150 }
1151
1152 if (.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
1153 Opc == AMDGPU::V_FMAC_F32_e32 ||
1154 Opc == AMDGPU::V_MAC_F16_e32 ||
1155 Opc == AMDGPU::V_MAC_F32_e32))
1156 return false;
1157
1158
1159 if (TII->pseudoToMCOpcode(Opc) == -1)
1160 return false;
1161
1162 if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
1163 if (!Src0->isReg() && !Src0->isImm())
1164 return false;
1165 }
1166
1167 if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
1168 if (!Src1->isReg() && !Src1->isImm())
1169 return false;
1170 }
1171
1172 return true;
1173}
1174}
1175
1176MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {
1177 unsigned Opcode = MI.getOpcode();
1179
1181 if (SDWAOpcode == -1)
1183 assert(SDWAOpcode != -1);
1184
1185 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
1186
1187
1188 MachineInstrBuilder SDWAInst =
1189 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
1191
1192
1193 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
1194 if (Dst) {
1196 SDWAInst.add(*Dst);
1197 } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
1199 SDWAInst.add(*Dst);
1200 } else {
1203 }
1204
1205
1206
1207 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
1210 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
1212 else
1214 SDWAInst.add(*Src0);
1215
1216
1217 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
1218 if (Src1) {
1221 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
1223 else
1225 SDWAInst.add(*Src1);
1226 }
1227
1228 if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
1229 SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
1230 SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1231 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1232
1233 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1235 SDWAInst.add(*Src2);
1236 }
1237
1238
1240 MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
1241 if (Clamp) {
1242 SDWAInst.add(*Clamp);
1243 } else {
1245 }
1246
1247
1249 MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
1250 if (OMod) {
1251 SDWAInst.add(*OMod);
1252 } else {
1254 }
1255 }
1256
1257
1259 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1260
1262 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
1263
1265 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1266
1267 if (Src1) {
1269 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1270 }
1271
1272
1273 MachineInstr *Ret = SDWAInst.getInstr();
1274 TII->fixImplicitOperands(*Ret);
1275 return Ret;
1276}
1277
1278bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
1279 const SDWAOperandsVector &SDWAOperands) {
1281
1282 MachineInstr *SDWAInst;
1283 if (TII->isSDWA(MI.getOpcode())) {
1284
1285
1286
1287 SDWAInst = MI.getMF()->CloneMachineInstr(&MI);
1288 MI.getParent()->insert(MI.getIterator(), SDWAInst);
1289 } else {
1290 SDWAInst = createSDWAVersion(MI);
1291 }
1292
1293
1294 bool Converted = false;
1295 for (auto &Operand : SDWAOperands) {
1296 LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307 if (PotentialMatches.count(Operand->getParentInst()) == 0)
1308 Converted |= Operand->convertToSDWA(*SDWAInst, TII);
1309 }
1310
1311 if (!Converted) {
1313 return false;
1314 }
1315
1316 ConvertedInstructions.push_back(SDWAInst);
1317 for (MachineOperand &MO : SDWAInst->uses()) {
1318 if (!MO.isReg())
1319 continue;
1320
1321 MRI->clearKillFlags(MO.getReg());
1322 }
1323 LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
1324 ++NumSDWAInstructionsPeepholed;
1325
1326 MI.eraseFromParent();
1327 return true;
1328}
1329
1330
1331
1332void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
1333 const GCNSubtarget &ST) const {
1334 const MCInstrDesc &Desc = TII->get(MI.getOpcode());
1335 unsigned ConstantBusCount = 0;
1336 for (MachineOperand &Op : MI.explicit_uses()) {
1337 if (Op.isReg()) {
1338 if (TRI->isVGPR(*MRI, Op.getReg()))
1339 continue;
1340
1341 if (ST.hasSDWAScalar() && ConstantBusCount == 0) {
1342 ++ConstantBusCount;
1343 continue;
1344 }
1345 } else if (.isImm())
1346 continue;
1347
1348 unsigned I = Op.getOperandNo();
1350 if (!OpRC || ->isVSSuperClass(OpRC))
1351 continue;
1352
1353 Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1354 auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
1355 TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
1356 if (Op.isImm())
1357 Copy.addImm(Op.getImm());
1358 else if (Op.isReg())
1360 Op.getSubReg());
1361 Op.ChangeToRegister(VGPR, false);
1362 }
1363}
1364
1365bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) {
1367 return false;
1368
1369 return SIPeepholeSDWA().run(MF);
1370}
1371
1372bool SIPeepholeSDWA::run(MachineFunction &MF) {
1373 const GCNSubtarget &ST = MF.getSubtarget();
1374
1375 if (.hasSDWA())
1376 return false;
1377
1379 TRI = ST.getRegisterInfo();
1381
1382
1383 bool Ret = false;
1384 for (MachineBasicBlock &MBB : MF) {
1386 do {
1387
1388
1389
1390
1391 matchSDWAOperands(MBB);
1392 for (const auto &OperandPair : SDWAOperands) {
1393 const auto &Operand = OperandPair.second;
1394 MachineInstr *PotentialMI = Operand->potentialToConvert(TII, ST);
1395 if (!PotentialMI)
1396 continue;
1397
1398 switch (PotentialMI->getOpcode()) {
1399 case AMDGPU::V_ADD_CO_U32_e64:
1400 case AMDGPU::V_SUB_CO_U32_e64:
1401 pseudoOpConvertToVOP2(*PotentialMI, ST);
1402 break;
1403 case AMDGPU::V_CNDMASK_B32_e64:
1404 convertVcndmaskToVOP2(*PotentialMI, ST);
1405 break;
1406 };
1407 }
1408 SDWAOperands.clear();
1409
1410
1411 matchSDWAOperands(MBB);
1412
1413 for (const auto &OperandPair : SDWAOperands) {
1414 const auto &Operand = OperandPair.second;
1415 MachineInstr *PotentialMI =
1416 Operand->potentialToConvert(TII, ST, &PotentialMatches);
1417
1418 if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST, TII))
1419 PotentialMatches[PotentialMI].push_back(Operand.get());
1420 }
1421
1422 for (auto &PotentialPair : PotentialMatches) {
1423 MachineInstr &PotentialMI = *PotentialPair.first;
1424 convertToSDWA(PotentialMI, PotentialPair.second);
1425 }
1426
1427 PotentialMatches.clear();
1428 SDWAOperands.clear();
1429
1431
1433 Ret = true;
1434 while (!ConvertedInstructions.empty())
1435 legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
1437 }
1438
1439 return Ret;
1440}
1441
1446
1449 return PA;
1450}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
Definition SIPeepholeSDWA.cpp:297
static void copyRegOperand(MachineOperand &To, const MachineOperand &From)
Definition SIPeepholeSDWA.cpp:270
static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
Definition SIPeepholeSDWA.cpp:289
static std::optional< SdwaSel > combineSdwaSel(SdwaSel Sel, SdwaSel OperandSel)
Combine an SDWA instruction's existing SDWA selection Sel with the SDWA selection OperandSel of its o...
Definition SIPeepholeSDWA.cpp:311
static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)
Definition SIPeepholeSDWA.cpp:282
static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII, AMDGPU::OpName SrcSelOpName, SdwaSel OpSel)
Verify that the SDWA selection operand SrcSelOpName of the SDWA instruction MI can be combined with t...
Definition SIPeepholeSDWA.cpp:502
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptNone() const
Do not optimize this function (-O0).
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class implements a map that also provides access to all stored values in a deterministic order.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition SIPeepholeSDWA.cpp:1442
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum) const
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int getSDWAOp(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createSIPeepholeSDWALegacyPass()
Definition SIPeepholeSDWA.cpp:219
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
char & SIPeepholeSDWALegacyID