LLVM: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
29#include "llvm/IR/IntrinsicsAMDGPU.h"
30#include
31
32#define DEBUG_TYPE "amdgpu-isel"
33
34using namespace llvm;
35using namespace MIPatternMatch;
36
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
41#undef AMDGPUSubtarget
42
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
47 STI(STI),
49#include "AMDGPUGenGlobalISel.inc"
52#include "AMDGPUGenGlobalISel.inc"
54{
55}
56
58
67}
68
69
71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
72 ? Def->getOperand(1).getReg()
74}
75
76bool AMDGPUInstructionSelector::isVCC(Register Reg,
78
79 if (Reg.isPhysical())
80 return false;
81
82 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
84 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
85 if (RC) {
86 const LLT Ty = MRI.getType(Reg);
88 return false;
89
90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
92 }
93
94 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
95 return RB->getID() == AMDGPU::VCCRegBankID;
96}
97
98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
99 unsigned NewOpc) const {
100 MI.setDesc(TII.get(NewOpc));
101 MI.removeOperand(1);
103
106
107
109 return false;
110
115 if (!DstRC || DstRC != SrcRC)
116 return false;
117
120}
121
122bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
125 I.setDesc(TII.get(TargetOpcode::COPY));
126
129 Register DstReg = Dst.getReg();
130 Register SrcReg = Src.getReg();
131
132 if (isVCC(DstReg, *MRI)) {
133 if (SrcReg == AMDGPU::SCC) {
136 if (!RC)
137 return true;
139 }
140
141 if (!isVCC(SrcReg, *MRI)) {
142
144 return false;
145
148
149 std::optional ConstVal =
151 if (ConstVal) {
152 unsigned MovOpc =
153 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
154 BuildMI(*BB, &I, DL, TII.get(MovOpc), DstReg)
155 .addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
156 } else {
157 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
158
159
160
161
162
165 const int64_t NoMods = 0;
166 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
172 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
178 } else {
180 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
181 auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
184 if (IsSGPR)
185 And.setOperandDead(3);
186
187 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
190 }
191 }
192
193 if (->getRegClassOrNull(SrcReg))
194 MRI->setRegClass(SrcReg, SrcRC);
195 I.eraseFromParent();
196 return true;
197 }
198
202 return false;
203
204 return true;
205 }
206
208 if (MO.getReg().isPhysical())
209 continue;
210
213 if (!RC)
214 continue;
216 }
217 return true;
218}
219
220bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
221 const Register DefReg = I.getOperand(0).getReg();
222 const LLT DefTy = MRI->getType(DefReg);
223
224
225
226
227
229 return false;
230
231
232
234 MRI->getRegClassOrRegBank(DefReg);
235
237 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
238 if (!DefRC) {
240 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
241 return false;
242 }
243
244 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
246 if (!DefRC) {
247 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
248 return false;
249 }
250 }
251
252
253 I.setDesc(TII.get(TargetOpcode::PHI));
255}
256
258AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
260 unsigned SubIdx) const {
261
264 Register DstReg = MRI->createVirtualRegister(&SubRC);
265
266 if (MO.isReg()) {
267 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
269 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
270 .addReg(Reg, 0, ComposedSubIdx);
271
276 }
277
279
281
282 switch (SubIdx) {
283 default:
284 llvm_unreachable("do not know to split immediate with this sub index.");
285 case AMDGPU::sub0:
287 case AMDGPU::sub1:
289 }
290}
291
293 switch (Opc) {
294 case AMDGPU::G_AND:
295 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
296 case AMDGPU::G_OR:
297 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
298 case AMDGPU::G_XOR:
299 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
300 default:
302 }
303}
304
305bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
306 Register DstReg = I.getOperand(0).getReg();
308
310 if (DstRB->getID() != AMDGPU::SGPRRegBankID &&
311 DstRB->getID() != AMDGPU::VCCRegBankID)
312 return false;
313
314 bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&
317
318
320 true,
321 false,
322 true));
324}
325
326bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
329 Register DstReg = I.getOperand(0).getReg();
331 LLT Ty = MRI->getType(DstReg);
333 return false;
334
337 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
338 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
339
340 if (Size == 32) {
341 if (IsSALU) {
342 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
344 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
348 I.eraseFromParent();
350 }
351
353 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
354 I.setDesc(TII.get(Opc));
358 }
359
360 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
361
364 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
369 I.eraseFromParent();
371 }
372
373 assert(!Sub && "illegal sub should not reach here");
374
376 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
378 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
379
380 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
381 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
382 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
383 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
384
385 Register DstLo = MRI->createVirtualRegister(&HalfRC);
386 Register DstHi = MRI->createVirtualRegister(&HalfRC);
387
388 if (IsSALU) {
389 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
390 .add(Lo1)
391 .add(Lo2);
392 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
393 .add(Hi1)
394 .add(Hi2)
396 } else {
398 Register CarryReg = MRI->createVirtualRegister(CarryRC);
399 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
401 .add(Lo1)
402 .add(Lo2)
406 .add(Hi1)
407 .add(Hi2)
410
412 return false;
413 }
414
415 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
417 .addImm(AMDGPU::sub0)
419 .addImm(AMDGPU::sub1);
420
421
423 return false;
424
425 I.eraseFromParent();
426 return true;
427}
428
429bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
434 Register Dst0Reg = I.getOperand(0).getReg();
435 Register Dst1Reg = I.getOperand(1).getReg();
436 const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO ||
437 I.getOpcode() == AMDGPU::G_UADDE;
438 const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE ||
439 I.getOpcode() == AMDGPU::G_USUBE;
440
441 if (isVCC(Dst1Reg, *MRI)) {
442 unsigned NoCarryOpc =
443 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
444 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
445 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
449 }
450
451 Register Src0Reg = I.getOperand(2).getReg();
452 Register Src1Reg = I.getOperand(3).getReg();
453
454 if (HasCarryIn) {
455 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
456 .addReg(I.getOperand(4).getReg());
457 }
458
459 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
460 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
461
462 auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
465
466 if (MRI->use_nodbg_empty(Dst1Reg)) {
468 } else {
469 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
470 .addReg(AMDGPU::SCC);
471 if (->getRegClassOrNull(Dst1Reg))
472 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
473 }
474
478 return false;
479
480 if (HasCarryIn &&
482 AMDGPU::SReg_32RegClass, *MRI))
483 return false;
484
485 I.eraseFromParent();
486 return true;
487}
488
489bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
493 const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
494
495 unsigned Opc;
497 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
498 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
499 else
500 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
501 I.setDesc(TII.get(Opc));
503 I.addImplicitDefUseOperands(*MF);
505}
506
507
508bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
510 Register DstReg = I.getOperand(0).getReg();
511 Register SrcReg = I.getOperand(1).getReg();
512 LLT DstTy = MRI->getType(DstReg);
513 LLT SrcTy = MRI->getType(SrcReg);
516
517
518 unsigned Offset = I.getOperand(2).getImm();
519 if (Offset % 32 != 0 || DstSize > 128)
520 return false;
521
522
523
524 if (DstSize == 16)
525 DstSize = 32;
526
530 return false;
531
535 if (!SrcRC)
536 return false;
538 DstSize / 32);
539 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
540 if (!SrcRC)
541 return false;
542
544 *SrcRC, I.getOperand(1));
546 BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
548
549 I.eraseFromParent();
550 return true;
551}
552
553bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
555 Register DstReg = MI.getOperand(0).getReg();
556 LLT DstTy = MRI->getType(DstReg);
557 LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());
558
560 if (SrcSize < 32)
562
568 if (!DstRC)
569 return false;
570
573 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
574 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
578
582 return false;
583 }
584
586 return false;
587
588 MI.eraseFromParent();
589 return true;
590}
591
592bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
594 const int NumDst = MI.getNumOperands() - 1;
595
597
598 Register SrcReg = Src.getReg();
599 Register DstReg0 = MI.getOperand(0).getReg();
600 LLT DstTy = MRI->getType(DstReg0);
601 LLT SrcTy = MRI->getType(SrcReg);
602
607
611 return false;
612
613
614
615
617 for (int I = 0, E = NumDst; I != E; ++I) {
619 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
620 .addReg(SrcReg, 0, SubRegs[I]);
621
622
623 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]);
625 return false;
626
630 return false;
631 }
632
633 MI.eraseFromParent();
634 return true;
635}
636
637bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
638 assert(MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
639 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
640
641 Register Src0 = MI.getOperand(1).getReg();
642 Register Src1 = MI.getOperand(2).getReg();
643 LLT SrcTy = MRI->getType(Src0);
645
646
647 if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
648 return selectG_MERGE_VALUES(MI);
649 }
650
651
652
653 Register Dst = MI.getOperand(0).getReg();
655 (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
658
660 if (DstBank->getID() == AMDGPU::AGPRRegBankID)
661 return false;
662
663 assert(DstBank->getID() == AMDGPU::SGPRRegBankID ||
664 DstBank->getID() == AMDGPU::VGPRRegBankID);
665 const bool IsVector = DstBank->getID() == AMDGPU::VGPRRegBankID;
666
669
670
671
672
674 if (ConstSrc1) {
675 auto ConstSrc0 =
677 if (ConstSrc0) {
678 const int64_t K0 = ConstSrc0->Value.getSExtValue();
679 const int64_t K1 = ConstSrc1->Value.getSExtValue();
683
684
685 if (IsVector) {
686 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), Dst).addImm(Imm);
687 MI.eraseFromParent();
689 }
690
691
692 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst).addImm(Imm);
693 MI.eraseFromParent();
695 }
696 }
697
698
700 return true;
701
702
703
705 if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
706 MI.setDesc(TII.get(AMDGPU::COPY));
707 MI.removeOperand(2);
708 const auto &RC =
709 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
712 }
713
714
715 if (IsVector) {
716 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
717 auto MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
721 return false;
722
723 MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
728 return false;
729
730 MI.eraseFromParent();
731 return true;
732 }
733
736
737
738
739
740
741
742
743
744
745
746
747
748
751
754
755 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
756 if (Shift0 && Shift1) {
757 Opc = AMDGPU::S_PACK_HH_B32_B16;
758 MI.getOperand(1).setReg(ShiftSrc0);
759 MI.getOperand(2).setReg(ShiftSrc1);
760 } else if (Shift1) {
761 Opc = AMDGPU::S_PACK_LH_B32_B16;
762 MI.getOperand(2).setReg(ShiftSrc1);
763 } else if (Shift0) {
764 auto ConstSrc1 =
766 if (ConstSrc1 && ConstSrc1->Value == 0) {
767
768 auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
772
773 MI.eraseFromParent();
775 }
777 Opc = AMDGPU::S_PACK_HL_B32_B16;
778 MI.getOperand(1).setReg(ShiftSrc0);
779 }
780 }
781
782 MI.setDesc(TII.get(Opc));
784}
785
786bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
788
789
790
792 if ((!RC && ->getRegBankOrNull(MO.getReg())) ||
794 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
795 return true;
796 }
797
798 return false;
799}
800
801bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
803
804 Register DstReg = I.getOperand(0).getReg();
805 Register Src0Reg = I.getOperand(1).getReg();
806 Register Src1Reg = I.getOperand(2).getReg();
807 LLT Src1Ty = MRI->getType(Src1Reg);
808
809 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
811
812 int64_t Offset = I.getOperand(3).getImm();
813
814
815 if (Offset % 32 != 0 || InsSize % 32 != 0)
816 return false;
817
818
819 if (InsSize > 128)
820 return false;
821
823 if (SubReg == AMDGPU::NoSubRegister)
824 return false;
825
829 if (!DstRC)
830 return false;
831
838
839
840
841 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
842 if (!Src0RC || !Src1RC)
843 return false;
844
848 return false;
849
851 BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
855
856 I.eraseFromParent();
857 return true;
858}
859
860bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(MachineInstr &MI) const {
861 Register DstReg = MI.getOperand(0).getReg();
862 Register SrcReg = MI.getOperand(1).getReg();
863 Register OffsetReg = MI.getOperand(2).getReg();
864 Register WidthReg = MI.getOperand(3).getReg();
865
867 "scalar BFX instructions are expanded in regbankselect");
868 assert(MRI->getType(MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
869 "64-bit vector BFX instructions are expanded in regbankselect");
870
873
874 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SBFX;
875 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
876 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), DstReg)
880 MI.eraseFromParent();
882}
883
884bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {
887
888 Register Dst = MI.getOperand(0).getReg();
889 Register Src0 = MI.getOperand(2).getReg();
890 Register M0Val = MI.getOperand(6).getReg();
894 return false;
895
896
897
898
899
900
901
902
903 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
906
907 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
909 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
911 .addImm(MI.getOperand(4).getImm())
912 .addImm(MI.getOperand(3).getImm());
913
914 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_P1LV_F16), Dst)
915 .addImm(0)
916 .addReg(Src0)
917 .addImm(MI.getOperand(4).getImm())
918 .addImm(MI.getOperand(3).getImm())
919 .addImm(0)
920 .addReg(InterpMov)
921 .addImm(MI.getOperand(5).getImm())
924
925 MI.eraseFromParent();
926 return true;
927}
928
929
930
931
932
933
934bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
935
938
941 Register VDst = MI.getOperand(0).getReg();
942 Register Val = MI.getOperand(2).getReg();
943 Register LaneSelect = MI.getOperand(3).getReg();
944 Register VDstIn = MI.getOperand(4).getReg();
945
946 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
947
948 std::optional ConstSelect =
950 if (ConstSelect) {
951
952
954 MIB.addImm(ConstSelect->Value.getSExtValue() &
956 } else {
957 std::optional ConstVal =
959
960
961
964 MIB.addImm(ConstVal->Value.getSExtValue());
965 MIB.addReg(LaneSelect);
966 } else {
968
969
970
971
973
974 BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
976 MIB.addReg(AMDGPU::M0);
977 }
978 }
979
981
982 MI.eraseFromParent();
984}
985
986
987
988bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
989 Register Dst0 = MI.getOperand(0).getReg();
990 Register Dst1 = MI.getOperand(1).getReg();
991
992 LLT Ty = MRI->getType(Dst0);
993 unsigned Opc;
995 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
997 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
998 else
999 return false;
1000
1001
1002
1005
1006 Register Numer = MI.getOperand(3).getReg();
1007 Register Denom = MI.getOperand(4).getReg();
1008 unsigned ChooseDenom = MI.getOperand(5).getImm();
1009
1010 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1011
1012 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
1014 .addImm(0)
1015 .addUse(Src0)
1016 .addImm(0)
1017 .addUse(Denom)
1018 .addImm(0)
1019 .addUse(Numer)
1020 .addImm(0)
1021 .addImm(0);
1022
1023 MI.eraseFromParent();
1025}
1026
1027bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
1028 Intrinsic::ID IntrinsicID = cast(I).getIntrinsicID();
1029 switch (IntrinsicID) {
1030 case Intrinsic::amdgcn_if_break: {
1032
1033
1034
1035 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1039
1040 Register DstReg = I.getOperand(0).getReg();
1041 Register Src0Reg = I.getOperand(2).getReg();
1042 Register Src1Reg = I.getOperand(3).getReg();
1043
1044 I.eraseFromParent();
1045
1046 for (Register Reg : { DstReg, Src0Reg, Src1Reg })
1048
1049 return true;
1050 }
1051 case Intrinsic::amdgcn_interp_p1_f16:
1052 return selectInterpP1F16(I);
1053 case Intrinsic::amdgcn_wqm:
1054 return constrainCopyLikeIntrin(I, AMDGPU::WQM);
1055 case Intrinsic::amdgcn_softwqm:
1056 return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);
1057 case Intrinsic::amdgcn_strict_wwm:
1058 case Intrinsic::amdgcn_wwm:
1059 return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WWM);
1060 case Intrinsic::amdgcn_strict_wqm:
1061 return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WQM);
1062 case Intrinsic::amdgcn_writelane:
1063 return selectWritelane(I);
1064 case Intrinsic::amdgcn_div_scale:
1065 return selectDivScale(I);
1066 case Intrinsic::amdgcn_icmp:
1067 case Intrinsic::amdgcn_fcmp:
1069 return true;
1070 return selectIntrinsicCmp(I);
1071 case Intrinsic::amdgcn_ballot:
1072 return selectBallot(I);
1073 case Intrinsic::amdgcn_reloc_constant:
1074 return selectRelocConstant(I);
1075 case Intrinsic::amdgcn_groupstaticsize:
1076 return selectGroupStaticSize(I);
1077 case Intrinsic::returnaddress:
1078 return selectReturnAddress(I);
1079 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1080 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1081 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1082 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1083 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1084 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1085 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1086 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1087 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1088 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1089 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1090 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1091 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1092 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1093 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1094 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1095 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1096 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1097 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1098 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1099 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1100 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1101 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1102 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1103 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1104 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1105 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1106 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1107 return selectSMFMACIntrin(I);
1108 case Intrinsic::amdgcn_permlane16_swap:
1109 case Intrinsic::amdgcn_permlane32_swap:
1110 return selectPermlaneSwapIntrin(I, IntrinsicID);
1111 default:
1113 }
1114}
1115
1119 return -1;
1120
1121 if (Size == 16 && !ST.has16BitInsts())
1122 return -1;
1123
1124 const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc,
1125 unsigned FakeS16Opc, unsigned S32Opc,
1126 unsigned S64Opc) {
1127 if (Size == 16)
1128
1129 return ST.hasTrue16BitInsts()
1130 ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
1131 : S16Opc;
1132 if (Size == 32)
1133 return S32Opc;
1134 return S64Opc;
1135 };
1136
1137 switch (P) {
1138 default:
1141 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1142 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1143 AMDGPU::V_CMP_NE_U64_e64);
1145 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1146 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1147 AMDGPU::V_CMP_EQ_U64_e64);
1149 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1150 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1151 AMDGPU::V_CMP_GT_I64_e64);
1153 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1154 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1155 AMDGPU::V_CMP_GE_I64_e64);
1157 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1158 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1159 AMDGPU::V_CMP_LT_I64_e64);
1161 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1162 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1163 AMDGPU::V_CMP_LE_I64_e64);
1165 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1166 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1167 AMDGPU::V_CMP_GT_U64_e64);
1169 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1170 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1171 AMDGPU::V_CMP_GE_U64_e64);
1173 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1174 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1175 AMDGPU::V_CMP_LT_U64_e64);
1177 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1178 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1179 AMDGPU::V_CMP_LE_U64_e64);
1180
1182 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1183 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1184 AMDGPU::V_CMP_EQ_F64_e64);
1186 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1187 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1188 AMDGPU::V_CMP_GT_F64_e64);
1190 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1191 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1192 AMDGPU::V_CMP_GE_F64_e64);
1194 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1195 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1196 AMDGPU::V_CMP_LT_F64_e64);
1198 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1199 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1200 AMDGPU::V_CMP_LE_F64_e64);
1202 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1203 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1204 AMDGPU::V_CMP_NEQ_F64_e64);
1206 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1207 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1208 AMDGPU::V_CMP_O_F64_e64);
1210 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1211 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1212 AMDGPU::V_CMP_U_F64_e64);
1214 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1215 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1216 AMDGPU::V_CMP_NLG_F64_e64);
1218 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1219 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1220 AMDGPU::V_CMP_NLE_F64_e64);
1222 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1223 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1224 AMDGPU::V_CMP_NLT_F64_e64);
1226 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1227 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1228 AMDGPU::V_CMP_NGE_F64_e64);
1230 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1231 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1232 AMDGPU::V_CMP_NGT_F64_e64);
1234 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1235 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1236 AMDGPU::V_CMP_NEQ_F64_e64);
1238 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1239 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1240 AMDGPU::V_CMP_TRU_F64_e64);
1242 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1243 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1244 AMDGPU::V_CMP_F_F64_e64);
1245 }
1246}
1247
1249 unsigned Size) const {
1250 if (Size == 64) {
1252 return -1;
1253
1254 switch (P) {
1256 return AMDGPU::S_CMP_LG_U64;
1258 return AMDGPU::S_CMP_EQ_U64;
1259 default:
1260 return -1;
1261 }
1262 }
1263
1264 if (Size == 32) {
1265 switch (P) {
1267 return AMDGPU::S_CMP_LG_U32;
1269 return AMDGPU::S_CMP_EQ_U32;
1271 return AMDGPU::S_CMP_GT_I32;
1273 return AMDGPU::S_CMP_GE_I32;
1275 return AMDGPU::S_CMP_LT_I32;
1277 return AMDGPU::S_CMP_LE_I32;
1279 return AMDGPU::S_CMP_GT_U32;
1281 return AMDGPU::S_CMP_GE_U32;
1283 return AMDGPU::S_CMP_LT_U32;
1285 return AMDGPU::S_CMP_LE_U32;
1287 return AMDGPU::S_CMP_EQ_F32;
1289 return AMDGPU::S_CMP_GT_F32;
1291 return AMDGPU::S_CMP_GE_F32;
1293 return AMDGPU::S_CMP_LT_F32;
1295 return AMDGPU::S_CMP_LE_F32;
1297 return AMDGPU::S_CMP_LG_F32;
1299 return AMDGPU::S_CMP_O_F32;
1301 return AMDGPU::S_CMP_U_F32;
1303 return AMDGPU::S_CMP_NLG_F32;
1305 return AMDGPU::S_CMP_NLE_F32;
1307 return AMDGPU::S_CMP_NLT_F32;
1309 return AMDGPU::S_CMP_NGE_F32;
1311 return AMDGPU::S_CMP_NGT_F32;
1313 return AMDGPU::S_CMP_NEQ_F32;
1314 default:
1316 }
1317 }
1318
1319 if (Size == 16) {
1321 return -1;
1322
1323 switch (P) {
1325 return AMDGPU::S_CMP_EQ_F16;
1327 return AMDGPU::S_CMP_GT_F16;
1329 return AMDGPU::S_CMP_GE_F16;
1331 return AMDGPU::S_CMP_LT_F16;
1333 return AMDGPU::S_CMP_LE_F16;
1335 return AMDGPU::S_CMP_LG_F16;
1337 return AMDGPU::S_CMP_O_F16;
1339 return AMDGPU::S_CMP_U_F16;
1341 return AMDGPU::S_CMP_NLG_F16;
1343 return AMDGPU::S_CMP_NLE_F16;
1345 return AMDGPU::S_CMP_NLT_F16;
1347 return AMDGPU::S_CMP_NGE_F16;
1349 return AMDGPU::S_CMP_NGT_F16;
1351 return AMDGPU::S_CMP_NEQ_F16;
1352 default:
1354 }
1355 }
1356
1357 return -1;
1358}
1359
1360bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
1361
1364
1365 Register SrcReg = I.getOperand(2).getReg();
1367
1369
1370 Register CCReg = I.getOperand(0).getReg();
1371 if (!isVCC(CCReg, *MRI)) {
1372 int Opcode = getS_CMPOpcode(Pred, Size);
1373 if (Opcode == -1)
1374 return false;
1378 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
1379 .addReg(AMDGPU::SCC);
1380 bool Ret =
1383 I.eraseFromParent();
1384 return Ret;
1385 }
1386
1387 if (I.getOpcode() == AMDGPU::G_FCMP)
1388 return false;
1389
1391 if (Opcode == -1)
1392 return false;
1393
1395 I.getOperand(0).getReg())
1401 I.eraseFromParent();
1402 return Ret;
1403}
1404
1405bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {
1406 Register Dst = I.getOperand(0).getReg();
1407 if (isVCC(Dst, *MRI))
1408 return false;
1409
1410 LLT DstTy = MRI->getType(Dst);
1412 return false;
1413
1416 Register SrcReg = I.getOperand(2).getReg();
1418
1419
1420 if (Size == 1)
1421 return false;
1422
1425 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1426 I.eraseFromParent();
1428 }
1429
1431 if (Opcode == -1)
1432 return false;
1433
1437 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS.getReg());
1438 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS.getReg());
1440 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, true);
1442 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &I, true);
1443 SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst);
1445 SelectedMI.addImm(Src0Mods);
1446 SelectedMI.addReg(Src0Reg);
1448 SelectedMI.addImm(Src1Mods);
1449 SelectedMI.addReg(Src1Reg);
1451 SelectedMI.addImm(0);
1453 SelectedMI.addImm(0);
1454
1457 return false;
1458
1459 I.eraseFromParent();
1460 return true;
1461}
1462
1463
1464
1465
1466
1470 if (MI->getParent() != MBB)
1471 return false;
1472
1473
1474 if (MI->getOpcode() == AMDGPU::COPY) {
1475 auto DstRB = MRI.getRegBankOrNull(MI->getOperand(0).getReg());
1476 auto SrcRB = MRI.getRegBankOrNull(MI->getOperand(1).getReg());
1477 if (DstRB && SrcRB && DstRB->getID() == AMDGPU::VCCRegBankID &&
1478 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1479 return true;
1480 }
1481
1482
1483 if (isa(MI))
1484 return true;
1485
1487
1491
1492 return false;
1493}
1494
1495bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
1498 Register DstReg = I.getOperand(0).getReg();
1499 Register SrcReg = I.getOperand(2).getReg();
1500 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1502
1503
1504
1505 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1506 return false;
1507
1508 std::optional Arg =
1510
1512
1513 if (BallotSize != WaveSize) {
1514 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1515 }
1516
1517 if (Arg) {
1518 const int64_t Value = Arg->Value.getZExtValue();
1519 if (Value == 0) {
1520
1521 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1523 } else {
1524
1527 }
1529 return false;
1530 } else {
1532
1533 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst).addReg(SrcReg);
1535 return false;
1536 } else {
1537
1538 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1539 auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), Dst)
1544 return false;
1545 }
1546 }
1547
1548
1549 if (BallotSize != WaveSize) {
1550 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1551 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg).addImm(0);
1552 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1554 .addImm(AMDGPU::sub0)
1556 .addImm(AMDGPU::sub1);
1557 }
1558
1559 I.eraseFromParent();
1560 return true;
1561}
1562
1563bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
1564 Register DstReg = I.getOperand(0).getReg();
1568 return false;
1569
1570 const bool IsVALU = DstBank->getID() == AMDGPU::VGPRRegBankID;
1571
1574 auto SymbolName = cast(Metadata->getOperand(0))->getString();
1575 auto *RelocSymbol = cast(
1576 M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
1577
1579 BuildMI(*BB, &I, I.getDebugLoc(),
1580 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1582
1583 I.eraseFromParent();
1584 return true;
1585}
1586
1587bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const {
1589
1590 Register DstReg = I.getOperand(0).getReg();
1592 unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ?
1593 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1594
1597
1598 auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg);
1599
1603 } else {
1608 }
1609
1610 I.eraseFromParent();
1612}
1613
1614bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
1618
1620 Register DstReg = Dst.getReg();
1621 unsigned Depth = I.getOperand(2).getImm();
1622
1625 if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||
1627 return false;
1628
1629
1630 if (Depth != 0 ||
1632 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
1634 I.eraseFromParent();
1635 return true;
1636 }
1637
1639
1641
1642
1645 AMDGPU::SReg_64RegClass, DL);
1646 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
1648 I.eraseFromParent();
1649 return true;
1650}
1651
1652bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
1653
1654
1656 BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1658
1660 MI.eraseFromParent();
1661
1662 if (->getRegClassOrNull(Reg))
1664 return true;
1665}
1666
1667bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1672
1673 unsigned IndexOperand = MI.getOperand(7).getImm();
1674 bool WaveRelease = MI.getOperand(8).getImm() != 0;
1675 bool WaveDone = MI.getOperand(9).getImm() != 0;
1676
1677 if (WaveDone && !WaveRelease)
1678 report_fatal_error("ds_ordered_count: wave_done requires wave_release");
1679
1680 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1681 IndexOperand &= ~0x3f;
1682 unsigned CountDw = 0;
1683
1685 CountDw = (IndexOperand >> 24) & 0xf;
1686 IndexOperand &= ~(0xf << 24);
1687
1688 if (CountDw < 1 || CountDw > 4) {
1690 "ds_ordered_count: dword count must be between 1 and 4");
1691 }
1692 }
1693
1694 if (IndexOperand)
1696
1697 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1699
1700 unsigned Offset0 = OrderedCountIndex << 2;
1701 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1702
1704 Offset1 |= (CountDw - 1) << 6;
1705
1707 Offset1 |= ShaderType << 2;
1708
1709 unsigned Offset = Offset0 | (Offset1 << 8);
1710
1711 Register M0Val = MI.getOperand(2).getReg();
1712 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1714
1715 Register DstReg = MI.getOperand(0).getReg();
1716 Register ValReg = MI.getOperand(3).getReg();
1718 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1722
1724 return false;
1725
1727 MI.eraseFromParent();
1728 return Ret;
1729}
1730
1732 switch (IntrID) {
1733 case Intrinsic::amdgcn_ds_gws_init:
1734 return AMDGPU::DS_GWS_INIT;
1735 case Intrinsic::amdgcn_ds_gws_barrier:
1736 return AMDGPU::DS_GWS_BARRIER;
1737 case Intrinsic::amdgcn_ds_gws_sema_v:
1738 return AMDGPU::DS_GWS_SEMA_V;
1739 case Intrinsic::amdgcn_ds_gws_sema_br:
1740 return AMDGPU::DS_GWS_SEMA_BR;
1741 case Intrinsic::amdgcn_ds_gws_sema_p:
1742 return AMDGPU::DS_GWS_SEMA_P;
1743 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1744 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1745 default:
1747 }
1748}
1749
1750bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
1752 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1754 return false;
1755
1756
1757 const bool HasVSrc = MI.getNumOperands() == 3;
1758 assert(HasVSrc || MI.getNumOperands() == 2);
1759
1760 Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();
1762 if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)
1763 return false;
1764
1766 unsigned ImmOffset;
1767
1770
1772
1773
1774
1775
1776 if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1777 Readfirstlane = OffsetDef;
1780 }
1781
1782 if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {
1783
1784
1785
1786
1787
1789 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1791 } else {
1792 std::tie(BaseOffset, ImmOffset) =
1794
1795 if (Readfirstlane) {
1796
1797
1799 return false;
1800
1803 } else {
1805 AMDGPU::SReg_32RegClass, *MRI))
1806 return false;
1807 }
1808
1809 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1810 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
1814
1815 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1817 }
1818
1819
1820
1821
1823
1824 if (HasVSrc) {
1825 Register VSrc = MI.getOperand(1).getReg();
1827
1829 return false;
1830 }
1831
1832 MIB.addImm(ImmOffset)
1834
1836
1837 MI.eraseFromParent();
1838 return true;
1839}
1840
1841bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
1842 bool IsAppend) const {
1843 Register PtrBase = MI.getOperand(2).getReg();
1844 LLT PtrTy = MRI->getType(PtrBase);
1846
1848 std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));
1849
1850
1851 if (!isDSOffsetLegal(PtrBase, Offset)) {
1852 PtrBase = MI.getOperand(2).getReg();
1854 }
1855
1858 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1859
1860 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1863 return false;
1864
1865 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())
1867 .addImm(IsGDS ? -1 : 0)
1869 MI.eraseFromParent();
1871}
1872
1873bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const {
1876
1879}
1880
1881bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
1882 Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID();
1886
1887
1888 if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||
1889 IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {
1892 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));
1893 }
1894 MI.eraseFromParent();
1895 return true;
1896 }
1897 }
1898
1899 if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
1900
1903 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
1905 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT))
1907 MI.eraseFromParent();
1908 return true;
1909 }
1910
1912}
1913
1915 bool &IsTexFail) {
1916 if (TexFailCtrl)
1917 IsTexFail = true;
1918
1919 TFE = (TexFailCtrl & 0x1) ? true : false;
1920 TexFailCtrl &= ~(uint64_t)0x1;
1921 LWE = (TexFailCtrl & 0x2) ? true : false;
1922 TexFailCtrl &= ~(uint64_t)0x2;
1923
1924 return TexFailCtrl == 0;
1925}
1926
1927bool AMDGPUInstructionSelector::selectImageIntrinsic(
1931
1934
1936 unsigned IntrOpcode = Intr->BaseOpcode;
1940
1941 const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
1942
1944 LLT VDataTy;
1945 int NumVDataDwords = -1;
1946 bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1947 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1948
1949 bool Unorm;
1950 if (!BaseOpcode->Sampler)
1951 Unorm = true;
1952 else
1953 Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0;
1954
1955 bool TFE;
1956 bool LWE;
1957 bool IsTexFail = false;
1958 if ((MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(),
1959 TFE, LWE, IsTexFail))
1960 return false;
1961
1962 const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm();
1963 const bool IsA16 = (Flags & 1) != 0;
1964 const bool IsG16 = (Flags & 2) != 0;
1965
1966
1967 if (IsA16 && !STI.hasG16() && !IsG16)
1968 return false;
1969
1970 unsigned DMask = 0;
1971 unsigned DMaskLanes = 0;
1972
1973 if (BaseOpcode->Atomic) {
1974 VDataOut = MI.getOperand(0).getReg();
1975 VDataIn = MI.getOperand(2).getReg();
1976 LLT Ty = MRI->getType(VDataIn);
1977
1978
1979 const bool Is64Bit = BaseOpcode->AtomicX2 ?
1982
1984 assert(MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1985
1986 DMask = Is64Bit ? 0xf : 0x3;
1987 NumVDataDwords = Is64Bit ? 4 : 2;
1988 } else {
1989 DMask = Is64Bit ? 0x3 : 0x1;
1990 NumVDataDwords = Is64Bit ? 2 : 1;
1991 }
1992 } else {
1993 DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm();
1995
1996 if (BaseOpcode->Store) {
1997 VDataIn = MI.getOperand(1).getReg();
1998 VDataTy = MRI->getType(VDataIn);
1999 NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32;
2000 } else if (BaseOpcode->NoReturn) {
2001 NumVDataDwords = 0;
2002 } else {
2003 VDataOut = MI.getOperand(0).getReg();
2004 VDataTy = MRI->getType(VDataOut);
2005 NumVDataDwords = DMaskLanes;
2006
2008 NumVDataDwords = (DMaskLanes + 1) / 2;
2009 }
2010 }
2011
2012
2013 if (Subtarget->hasG16() && IsG16) {
2016 assert(G16MappingInfo);
2017 IntrOpcode = G16MappingInfo->G16;
2018 }
2019
2020
2021 assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
2022
2023 unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
2024 if (BaseOpcode->Atomic)
2028 return false;
2029
2030 int NumVAddrRegs = 0;
2031 int NumVAddrDwords = 0;
2032 for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {
2033
2035 if (!AddrOp.isReg())
2036 continue;
2037
2040 break;
2041
2042 ++NumVAddrRegs;
2043 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2044 }
2045
2046
2047
2048
2049 const bool UseNSA =
2050 NumVAddrRegs != 1 &&
2052 : NumVAddrDwords == NumVAddrRegs);
2053 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2054 LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n");
2055 return false;
2056 }
2057
2058 if (IsTexFail)
2059 ++NumVDataDwords;
2060
2061 int Opcode = -1;
2062 if (IsGFX12Plus) {
2064 NumVDataDwords, NumVAddrDwords);
2065 } else if (IsGFX11Plus) {
2067 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2068 : AMDGPU::MIMGEncGfx11Default,
2069 NumVDataDwords, NumVAddrDwords);
2070 } else if (IsGFX10Plus) {
2072 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2073 : AMDGPU::MIMGEncGfx10Default,
2074 NumVDataDwords, NumVAddrDwords);
2075 } else {
2078 NumVDataDwords, NumVAddrDwords);
2079 if (Opcode == -1) {
2082 << "requested image instruction is not supported on this GPU\n");
2083 return false;
2084 }
2085 }
2086 if (Opcode == -1 &&
2089 NumVDataDwords, NumVAddrDwords);
2090 if (Opcode == -1)
2092 NumVDataDwords, NumVAddrDwords);
2093 }
2094 if (Opcode == -1)
2095 return false;
2096
2099
2100 if (VDataOut) {
2102 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2103
2104 Register TmpReg = MRI->createVirtualRegister(
2105 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2106 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2107
2109 if (->use_empty(VDataOut)) {
2110 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut)
2112 }
2113
2114 } else {
2115 MIB.addDef(VDataOut);
2116 }
2117 }
2118
2119 if (VDataIn)
2120 MIB.addReg(VDataIn);
2121
2122 for (int I = 0; I != NumVAddrRegs; ++I) {
2124 if (SrcOp.isReg()) {
2127 }
2128 }
2129
2130 MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg());
2131 if (BaseOpcode->Sampler)
2132 MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg());
2133
2134 MIB.addImm(DMask);
2135
2136 if (IsGFX10Plus)
2140
2142 MIB.addImm(IsA16 &&
2143 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2144 if (IsGFX10Plus)
2145 MIB.addImm(IsA16 ? -1 : 0);
2146
2148 MIB.addImm(TFE);
2149 } else if (TFE) {
2150 LLVM_DEBUG(dbgs() << "TFE is not supported on this GPU\n");
2151 return false;
2152 }
2153
2155 MIB.addImm(LWE);
2156 if (!IsGFX10Plus)
2157 MIB.addImm(DimInfo->DA ? -1 : 0);
2158 if (BaseOpcode->HasD16)
2159 MIB.addImm(IsD16 ? -1 : 0);
2160
2161 MI.eraseFromParent();
2164 return true;
2165}
2166
2167
2168
2169bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2171 Register Dst0 = MI.getOperand(0).getReg();
2172 Register Dst1 = MI.getOperand(1).getReg();
2173
2176
2178 Register Data0 = MI.getOperand(4).getReg();
2179 Register Data1 = MI.getOperand(5).getReg();
2180 unsigned Offset = MI.getOperand(6).getImm();
2181
2182 auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2189
2190 MI.eraseFromParent();
2192}
2193
2194bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2196 Intrinsic::ID IntrinsicID = cast(I).getIntrinsicID();
2197 switch (IntrinsicID) {
2198 case Intrinsic::amdgcn_end_cf:
2199 return selectEndCfIntrinsic(I);
2200 case Intrinsic::amdgcn_ds_ordered_add:
2201 case Intrinsic::amdgcn_ds_ordered_swap:
2202 return selectDSOrderedIntrinsic(I, IntrinsicID);
2203 case Intrinsic::amdgcn_ds_gws_init:
2204 case Intrinsic::amdgcn_ds_gws_barrier:
2205 case Intrinsic::amdgcn_ds_gws_sema_v:
2206 case Intrinsic::amdgcn_ds_gws_sema_br:
2207 case Intrinsic::amdgcn_ds_gws_sema_p:
2208 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2209 return selectDSGWSIntrinsic(I, IntrinsicID);
2210 case Intrinsic::amdgcn_ds_append:
2211 return selectDSAppendConsume(I, true);
2212 case Intrinsic::amdgcn_ds_consume:
2213 return selectDSAppendConsume(I, false);
2214 case Intrinsic::amdgcn_init_whole_wave:
2215 return selectInitWholeWave(I);
2216 case Intrinsic::amdgcn_s_barrier:
2217 case Intrinsic::amdgcn_s_barrier_signal:
2218 case Intrinsic::amdgcn_s_barrier_wait:
2219 return selectSBarrier(I);
2220 case Intrinsic::amdgcn_raw_buffer_load_lds:
2221 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2222 case Intrinsic::amdgcn_struct_buffer_load_lds:
2223 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2224 return selectBufferLoadLds(I);
2225 case Intrinsic::amdgcn_global_load_lds:
2226 return selectGlobalLoadLds(I);
2227 case Intrinsic::amdgcn_exp_compr:
2229 Function &F = I.getMF()->getFunction();
2231 F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error);
2232 F.getContext().diagnose(NoFpRet);
2233 return false;
2234 }
2235 break;
2236 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2237 return selectDSBvhStackIntrinsic(I);
2238 case Intrinsic::amdgcn_s_barrier_init:
2239 case Intrinsic::amdgcn_s_barrier_signal_var:
2240 return selectNamedBarrierInit(I, IntrinsicID);
2241 case Intrinsic::amdgcn_s_barrier_join:
2242 case Intrinsic::amdgcn_s_get_named_barrier_state:
2243 return selectNamedBarrierInst(I, IntrinsicID);
2244 case Intrinsic::amdgcn_s_get_barrier_state:
2245 return selectSGetBarrierState(I, IntrinsicID);
2246 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2247 return selectSBarrierSignalIsfirst(I, IntrinsicID);
2248 }
2250}
2251
2252bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
2254 return true;
2255
2258
2259 Register DstReg = I.getOperand(0).getReg();
2264 if (!isVCC(CCReg, *MRI)) {
2265 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
2266 AMDGPU::S_CSELECT_B32;
2269
2270
2271
2272
2273 if (->getRegClassOrNull(CCReg))
2278
2279 bool Ret = false;
2282 I.eraseFromParent();
2283 return Ret;
2284 }
2285
2286
2287 if (Size > 32)
2288 return false;
2289
2291 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2297
2299 I.eraseFromParent();
2300 return Ret;
2301}
2302
2303bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
2304 Register DstReg = I.getOperand(0).getReg();
2305 Register SrcReg = I.getOperand(1).getReg();
2306 const LLT DstTy = MRI->getType(DstReg);
2307 const LLT SrcTy = MRI->getType(SrcReg);
2309
2312 if (DstTy == S1) {
2313
2314
2315 DstRB = SrcRB;
2316 } else {
2317 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2318 if (SrcRB != DstRB)
2319 return false;
2320 }
2321
2322 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
2323
2326
2331 if (!SrcRC || !DstRC)
2332 return false;
2333
2336 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
2337 return false;
2338 }
2339
2340 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2344 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), DstReg)
2345 .addReg(SrcReg, 0, AMDGPU::lo16);
2346 I.eraseFromParent();
2347 return true;
2348 }
2349
2353
2354 Register LoReg = MRI->createVirtualRegister(DstRC);
2355 Register HiReg = MRI->createVirtualRegister(DstRC);
2356 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), LoReg)
2357 .addReg(SrcReg, 0, AMDGPU::sub0);
2358 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), HiReg)
2359 .addReg(SrcReg, 0, AMDGPU::sub1);
2360
2361 if (IsVALU && STI.hasSDWA()) {
2362
2363
2365 BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2366 .addImm(0)
2367 .addReg(HiReg)
2368 .addImm(0)
2374 } else {
2375 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2376 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2377 Register ImmReg = MRI->createVirtualRegister(DstRC);
2378 if (IsVALU) {
2379 BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2382 } else {
2383 BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
2387 }
2388
2389 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2390 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2391 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2392
2401
2402 if (!IsVALU) {
2403 And.setOperandDead(3);
2404 Or.setOperandDead(3);
2405 }
2406 }
2407
2408 I.eraseFromParent();
2409 return true;
2410 }
2411
2413 return false;
2414
2415 if (SrcSize > 32) {
2416 unsigned SubRegIdx =
2418 if (SubRegIdx == AMDGPU::NoSubRegister)
2419 return false;
2420
2421
2422
2424 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2425 if (!SrcWithSubRC)
2426 return false;
2427
2428 if (SrcWithSubRC != SrcRC) {
2430 return false;
2431 }
2432
2433 I.getOperand(1).setSubReg(SubRegIdx);
2434 }
2435
2436 I.setDesc(TII.get(TargetOpcode::COPY));
2437 return true;
2438}
2439
2440
2442 Mask = maskTrailingOnes(Size);
2443 int SignedMask = static_cast<int>(Mask);
2444 return SignedMask >= -16 && SignedMask <= 64;
2445}
2446
2447
2448const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2452 if (auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))
2453 return RB;
2454
2455
2456 if (auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
2458 return nullptr;
2459}
2460
2461bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
2462 bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG;
2463 bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg;
2466 const Register DstReg = I.getOperand(0).getReg();
2467 const Register SrcReg = I.getOperand(1).getReg();
2468
2469 const LLT DstTy = MRI->getType(DstReg);
2470 const LLT SrcTy = MRI->getType(SrcReg);
2471 const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2475 return false;
2476
2477
2478 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2479
2480
2481 if (I.getOpcode() == AMDGPU::G_ANYEXT) {
2482 if (DstSize <= 32)
2483 return selectCOPY(I);
2484
2486 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2489 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2490
2491 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2492 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2493 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2495 .addImm(AMDGPU::sub0)
2497 .addImm(AMDGPU::sub1);
2498 I.eraseFromParent();
2499
2502 }
2503
2504 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2505
2506
2507
2508 unsigned Mask;
2511 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
2514 I.eraseFromParent();
2516 }
2517
2518 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2522 .addImm(0)
2523 .addImm(SrcSize);
2524 I.eraseFromParent();
2526 }
2527
2528 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2530 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2532 return false;
2533
2534 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2535 const unsigned SextOpc = SrcSize == 8 ?
2536 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2539 I.eraseFromParent();
2541 }
2542
2543
2544
2545 if (DstSize > 32 && SrcSize == 32) {
2546 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2547 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2549 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg)
2553 } else {
2554 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
2556 }
2557 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2559 .addImm(AMDGPU::sub0)
2561 .addImm(AMDGPU::sub1);
2562 I.eraseFromParent();
2564 *MRI);
2565 }
2566
2567 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2568 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2569
2570
2571 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2572
2573 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2574 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2575 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2576
2577 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2578 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
2580 .addImm(AMDGPU::sub0)
2582 .addImm(AMDGPU::sub1);
2583
2586 .addImm(SrcSize << 16);
2587
2588 I.eraseFromParent();
2590 }
2591
2592 unsigned Mask;
2594 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
2598 } else {
2601 .addImm(SrcSize << 16);
2602 }
2603
2604 I.eraseFromParent();
2606 }
2607
2608 return false;
2609}
2610
2613}
2614
2618 Reg = BitcastSrc;
2619 return Reg;
2620}
2621
2626 return false;
2627
2634 return true;
2635 }
2636 }
2637
2639 if (Shuffle->getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2640 return false;
2641
2644
2646 assert(Mask.size() == 2);
2647
2648 if (Mask[0] == 1 && Mask[1] <= 1) {
2650 return true;
2651 }
2652
2653 return false;
2654}
2655
2656bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
2658 return false;
2659
2660 Register Dst = I.getOperand(0).getReg();
2662 if (DstRB->getID() != AMDGPU::SGPRRegBankID)
2663 return false;
2664
2665 Register Src = I.getOperand(1).getReg();
2666
2671 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2673 I.eraseFromParent();
2675 }
2676 }
2677
2678 return false;
2679}
2680
2681bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693 Register Dst = MI.getOperand(0).getReg();
2695 if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
2697 return false;
2698
2699 Register Src = MI.getOperand(1).getReg();
2701 if (Fabs)
2703
2706 return false;
2707
2710 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2711 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2712 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2713 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2714
2715 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
2716 .addReg(Src, 0, AMDGPU::sub0);
2717 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
2718 .addReg(Src, 0, AMDGPU::sub1);
2719 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2720 .addImm(0x80000000);
2721
2722
2723 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2724 BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)
2728 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2730 .addImm(AMDGPU::sub0)
2732 .addImm(AMDGPU::sub1);
2733 MI.eraseFromParent();
2734 return true;
2735}
2736
2737
2738bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {
2739 Register Dst = MI.getOperand(0).getReg();
2741 if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
2743 return false;
2744
2745 Register Src = MI.getOperand(1).getReg();
2748 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2749 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2750 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2751 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2752
2755 return false;
2756
2757 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
2758 .addReg(Src, 0, AMDGPU::sub0);
2759 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
2760 .addReg(Src, 0, AMDGPU::sub1);
2761 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2762 .addImm(0x7fffffff);
2763
2764
2765
2766 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2770 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2772 .addImm(AMDGPU::sub0)
2774 .addImm(AMDGPU::sub1);
2775
2776 MI.eraseFromParent();
2777 return true;
2778}
2779
2781 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2782}
2783
2784void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
2786
2787 unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2789 MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg());
2790
2792
2793 if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD)
2794 return;
2795
2796 GEPInfo GEPInfo;
2797
2798 for (unsigned i = 1; i != 3; ++i) {
2803
2804
2805 assert(GEPInfo.Imm == 0);
2807 continue;
2808 }
2810 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
2811 GEPInfo.SgprParts.push_back(GEPOp.getReg());
2812 else
2813 GEPInfo.VgprParts.push_back(GEPOp.getReg());
2814 }
2815
2817 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2818}
2819
2820bool AMDGPUInstructionSelector::isSGPR(Register Reg) const {
2821 return RBI.getRegBank(Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2822}
2823
2824bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
2825 if (.hasOneMemOperand())
2826 return false;
2827
2830
2831
2832
2833
2834
2835 if ( || isa(Ptr) || isa(Ptr) ||
2837 return true;
2838
2840 return true;
2841
2842 if (MI.getOpcode() == AMDGPU::G_PREFETCH)
2843 return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
2844 AMDGPU::SGPRRegBankID;
2845
2847 return I && I->getMetadata("amdgpu.uniform");
2848}
2849
2850bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const {
2851 for (const GEPInfo &GEPInfo : AddrInfo) {
2852 if (!GEPInfo.VgprParts.empty())
2853 return true;
2854 }
2855 return false;
2856}
2857
2858void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
2859 const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
2864
2865
2866 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2868 }
2869}
2870
2871bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2873 initM0(I);
2875}
2876
2878 if (Reg.isPhysical())
2879 return false;
2880
2882 const unsigned Opcode = MI.getOpcode();
2883
2884 if (Opcode == AMDGPU::COPY)
2886
2887 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2888 Opcode == AMDGPU::G_XOR)
2891
2892 if (auto *GI = dyn_cast(&MI))
2893 return GI->is(Intrinsic::amdgcn_class);
2894
2895 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2896}
2897
2898bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
2903
2904 unsigned BrOpcode;
2907
2908
2909
2910
2911
2912
2913 if (!isVCC(CondReg, *MRI)) {
2915 return false;
2916
2917 CondPhysReg = AMDGPU::SCC;
2918 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2919 ConstrainRC = &AMDGPU::SReg_32RegClass;
2920 } else {
2921
2922
2923
2924
2926 const bool Is64 = STI.isWave64();
2927 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2928 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2929
2930 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
2931 BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
2935 CondReg = TmpReg;
2936 }
2937
2938 CondPhysReg = TRI.getVCC();
2939 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2940 ConstrainRC = TRI.getBoolRC();
2941 }
2942
2943 if (->getRegClassOrNull(CondReg))
2944 MRI->setRegClass(CondReg, ConstrainRC);
2945
2946 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
2948 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
2949 .addMBB(I.getOperand(1).getMBB());
2950
2951 I.eraseFromParent();
2952 return true;
2953}
2954
2955bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2957 Register DstReg = I.getOperand(0).getReg();
2959 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
2960 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2961 if (IsVGPR)
2963
2965 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2966}
2967
2968bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
2969 Register DstReg = I.getOperand(0).getReg();
2970 Register SrcReg = I.getOperand(1).getReg();
2971 Register MaskReg = I.getOperand(2).getReg();
2972 LLT Ty = MRI->getType(DstReg);
2973 LLT MaskTy = MRI->getType(MaskReg);
2976
2980 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
2981 if (DstRB != SrcRB)
2982 return false;
2983
2984
2985
2989
2990 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2991 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2992
2994 !CanCopyLow32 && !CanCopyHi32) {
2995 auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2999 I.eraseFromParent();
3001 }
3002
3003 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3005 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3006
3010 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3011
3015 return false;
3016
3019 "ptrmask should have been narrowed during legalize");
3020
3021 auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
3024
3025 if (!IsVGPR)
3027 I.eraseFromParent();
3028 return true;
3029 }
3030
3031 Register HiReg = MRI->createVirtualRegister(&RegRC);
3032 Register LoReg = MRI->createVirtualRegister(&RegRC);
3033
3034
3035 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)
3036 .addReg(SrcReg, 0, AMDGPU::sub0);
3037 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)
3038 .addReg(SrcReg, 0, AMDGPU::sub1);
3039
3040 Register MaskedLo, MaskedHi;
3041
3042 if (CanCopyLow32) {
3043
3044 MaskedLo = LoReg;
3045 } else {
3046
3047 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3048 MaskedLo = MRI->createVirtualRegister(&RegRC);
3049
3050 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskLo)
3051 .addReg(MaskReg, 0, AMDGPU::sub0);
3052 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedLo)
3055 }
3056
3057 if (CanCopyHi32) {
3058
3059 MaskedHi = HiReg;
3060 } else {
3061 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3062 MaskedHi = MRI->createVirtualRegister(&RegRC);
3063
3064 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskHi)
3065 .addReg(MaskReg, 0, AMDGPU::sub1);
3066 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedHi)
3069 }
3070
3071 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3073 .addImm(AMDGPU::sub0)
3075 .addImm(AMDGPU::sub1);
3076 I.eraseFromParent();
3077 return true;
3078}
3079
3080
3081
3082static std::pair<Register, unsigned>
3088
3089 std::tie(IdxBaseReg, Offset) =
3091 if (IdxBaseReg == AMDGPU::NoRegister) {
3092
3093
3095 IdxBaseReg = IdxReg;
3096 }
3097
3099
3100
3101
3102 if (static_cast<unsigned>(Offset) >= SubRegs.size())
3103 return std::pair(IdxReg, SubRegs[0]);
3104 return std::pair(IdxBaseReg, SubRegs[Offset]);
3105}
3106
3107bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3109 Register DstReg = MI.getOperand(0).getReg();
3110 Register SrcReg = MI.getOperand(1).getReg();
3111 Register IdxReg = MI.getOperand(2).getReg();
3112
3113 LLT DstTy = MRI->getType(DstReg);
3114 LLT SrcTy = MRI->getType(SrcReg);
3115
3119
3120
3121
3122 if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
3123 return false;
3124
3126 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3128 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3129 if (!SrcRC || !DstRC)
3130 return false;
3134 return false;
3135
3139
3142 *MRI, TRI, SrcRC, IdxReg, DstTy.getSizeInBits() / 8, *KB);
3143
3144 if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
3146 return false;
3147
3148 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3150
3151 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3152 BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg)
3155 MI.eraseFromParent();
3156 return true;
3157 }
3158
3159 if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32)
3160 return false;
3161
3163 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3165 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3168 MI.eraseFromParent();
3169 return true;
3170 }
3171
3174 BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)
3178
3179 MI.eraseFromParent();
3180 return true;
3181}
3182
3183
3184bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3186 Register DstReg = MI.getOperand(0).getReg();
3187 Register VecReg = MI.getOperand(1).getReg();
3188 Register ValReg = MI.getOperand(2).getReg();
3189 Register IdxReg = MI.getOperand(3).getReg();
3190
3191 LLT VecTy = MRI->getType(DstReg);
3192 LLT ValTy = MRI->getType(ValReg);
3195
3199
3201
3202
3203
3204 if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
3205 return false;
3206
3208 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3210 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3211
3216 return false;
3217
3218 if (VecRB->getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3219 return false;
3220
3222 std::tie(IdxReg, SubReg) =
3224
3225 const bool IndexMode = VecRB->getID() == AMDGPU::VGPRRegBankID &&
3227
3230
3231 if (!IndexMode) {
3232 BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3234
3236 VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID);
3237 BuildMI(*BB, MI, DL, RegWriteOp, DstReg)
3241 MI.eraseFromParent();
3242 return true;
3243 }
3244
3247 BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)
3252
3253 MI.eraseFromParent();
3254 return true;
3255}
3256
3257bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
3259 unsigned Opc;
3260 unsigned Size = MI.getOperand(3).getImm();
3261
3262
3263 const bool HasVIndex = MI.getNumOperands() == 9;
3265 int OpOffset = 0;
3266 if (HasVIndex) {
3267 VIndex = MI.getOperand(4).getReg();
3268 OpOffset = 1;
3269 }
3270
3271 Register VOffset = MI.getOperand(4 + OpOffset).getReg();
3272 std::optional MaybeVOffset =
3274 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3275
3276 switch (Size) {
3277 default:
3278 return false;
3279 case 1:
3280 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3281 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3282 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3283 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3284 break;
3285 case 2:
3286 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3287 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3288 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3289 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3290 break;
3291 case 4:
3292 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3293 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3294 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3295 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3296 break;
3297 case 12:
3299 return false;
3300
3301 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3302 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3303 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3304 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3305 break;
3306 case 16:
3308 return false;
3309
3310 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3311 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3312 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3313 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3314 break;
3315 }
3316
3319 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3321
3323
3324 if (HasVIndex && HasVOffset) {
3325 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3326 BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3328 .addImm(AMDGPU::sub0)
3330 .addImm(AMDGPU::sub1);
3331
3333 } else if (HasVIndex) {
3335 } else if (HasVOffset) {
3336 MIB.addReg(VOffset);
3337 }
3338
3339 MIB.add(MI.getOperand(1));
3340 MIB.add(MI.getOperand(5 + OpOffset));
3341 MIB.add(MI.getOperand(6 + OpOffset));
3343 unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
3348 ? 1
3349 : 0);
3350
3353 LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm();
3355 StorePtrI.V = nullptr;
3357
3362
3366
3367 MIB.setMemRefs({LoadMMO, StoreMMO});
3368
3369 MI.eraseFromParent();
3371}
3372
3373
3378
3379
3381 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3383
3384 assert(Def->getNumOperands() == 3 &&
3385 MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64));
3387 return Def->getOperand(1).getReg();
3388 }
3389
3391}
3392
3393bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
3394 unsigned Opc;
3395 unsigned Size = MI.getOperand(3).getImm();
3396
3397 switch (Size) {
3398 default:
3399 return false;
3400 case 1:
3401 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3402 break;
3403 case 2:
3404 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3405 break;
3406 case 4:
3407 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3408 break;
3409 case 12:
3411 return false;
3412 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3413 break;
3414 case 16:
3416 return false;
3417 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3418 break;
3419 }
3420
3423 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3425
3428
3429
3430 if (!isSGPR(Addr)) {
3432 if (isSGPR(AddrDef->Reg)) {
3433 Addr = AddrDef->Reg;
3434 } else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3437 if (isSGPR(SAddr)) {
3438 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3440 Addr = SAddr;
3441 VOffset = Off;
3442 }
3443 }
3444 }
3445 }
3446
3447 if (isSGPR(Addr)) {
3449 if (!VOffset) {
3450 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3451 BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
3453 }
3454 }
3455
3458
3459 if (isSGPR(Addr))
3460 MIB.addReg(VOffset);
3461
3462 MIB.add(MI.getOperand(4))
3464
3467 LoadPtrI.Offset = MI.getOperand(4).getImm();
3477 sizeof(int32_t), Align(4));
3478
3479 MIB.setMemRefs({LoadMMO, StoreMMO});
3480
3481 MI.eraseFromParent();
3483}
3484
3485bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{
3486 MI.setDesc(TII.get(MI.getOperand(1).getImm()));
3487 MI.removeOperand(1);
3488 MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
3489 return true;
3490}
3491
3492
3493
3494bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
3495 unsigned Opc;
3497 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3498 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3499 break;
3500 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3501 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3502 break;
3503 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3504 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3505 break;
3506 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3507 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3508 break;
3509 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3510 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3511 break;
3512 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3513 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3514 break;
3515 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3516 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3517 break;
3518 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3519 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3520 break;
3521 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3522 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3523 break;
3524 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3525 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3526 break;
3527 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3528 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3529 break;
3530 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3531 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3532 break;
3533 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3534 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3535 break;
3536 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3537 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3538 break;
3539 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3540 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3541 break;
3542 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3543 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3544 break;
3545 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3546 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3547 break;
3548 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3549 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3550 break;
3551 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3552 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3553 break;
3554 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3555 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3556 break;
3557 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3558 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3559 break;
3560 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3561 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3562 break;
3563 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3564 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3565 break;
3566 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3567 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3568 break;
3569 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3570 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3571 break;
3572 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3573 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3574 break;
3575 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3576 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3577 break;
3578 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3579 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3580 break;
3581 default:
3583 }
3584
3585 auto VDst_In = MI.getOperand(4);
3586
3587 MI.setDesc(TII.get(Opc));
3588 MI.removeOperand(4);
3589 MI.removeOperand(1);
3590 MI.addOperand(VDst_In);
3591 MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
3592 return true;
3593}
3594
3595bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3597 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3599 return false;
3600 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3602 return false;
3603
3604 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3605 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3606 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3607
3608 MI.removeOperand(2);
3609 MI.setDesc(TII.get(Opcode));
3611
3614
3616}
3617
3618bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
3619 Register DstReg = MI.getOperand(0).getReg();
3620 Register SrcReg = MI.getOperand(1).getReg();
3622 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
3625
3626 if (IsVALU) {
3627 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3630 } else {
3631 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)
3635 }
3636
3638 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3640 return false;
3641
3642 MI.eraseFromParent();
3643 return true;
3644}
3645
3646
3647
3651 unsigned NumOpcodes = 0;
3652 uint8_t LHSBits, RHSBits;
3653
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3665
3667 Bits = 0xff;
3668 return true;
3669 }
3671 Bits = 0;
3672 return true;
3673 }
3674
3675 for (unsigned I = 0; I < Src.size(); ++I) {
3676
3678 Bits = SrcBits[I];
3679 return true;
3680 }
3681
3682 if (Src[I] == R) {
3683 Bits = SrcBits[I];
3685 return true;
3686 }
3687 }
3688
3689 if (Src.size() == 3) {
3690
3691
3692
3696 for (unsigned I = 0; I < Src.size(); ++I) {
3698 Bits = ~SrcBits[I];
3699 return true;
3700 }
3701 }
3702 }
3703
3704 return false;
3705 }
3706
3707 Bits = SrcBits[Src.size()];
3708 Src.push_back(Op);
3709 return true;
3710 };
3711
3713 switch (MI->getOpcode()) {
3714 case TargetOpcode::G_AND:
3715 case TargetOpcode::G_OR:
3716 case TargetOpcode::G_XOR: {
3719
3721 if (!getOperandBits(LHS, LHSBits) ||
3722 !getOperandBits(RHS, RHSBits)) {
3723 Src = Backup;
3724 return std::make_pair(0, 0);
3725 }
3726
3727
3729 if (Op.first) {
3730 NumOpcodes += Op.first;
3731 LHSBits = Op.second;
3732 }
3733
3735 if (Op.first) {
3736 NumOpcodes += Op.first;
3737 RHSBits = Op.second;
3738 }
3739 break;
3740 }
3741 default:
3742 return std::make_pair(0, 0);
3743 }
3744
3746 switch (MI->getOpcode()) {
3747 case TargetOpcode::G_AND:
3748 TTbl = LHSBits & RHSBits;
3749 break;
3750 case TargetOpcode::G_OR:
3751 TTbl = LHSBits | RHSBits;
3752 break;
3753 case TargetOpcode::G_XOR:
3754 TTbl = LHSBits ^ RHSBits;
3755 break;
3756 default:
3757 break;
3758 }
3759
3760 return std::make_pair(NumOpcodes + 1, TTbl);
3761}
3762
3763bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
3765 return false;
3766
3767 Register DstReg = MI.getOperand(0).getReg();
3769 const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
3770 if (!IsVALU)
3771 return false;
3772
3775 unsigned NumOpcodes;
3776
3777 std::tie(NumOpcodes, TTbl) = BitOp3_Op(DstReg, Src, *MRI);
3778
3779
3780
3781 if (NumOpcodes < 2 || Src.empty())
3782 return false;
3783
3784 const bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
3785 if (NumOpcodes == 2 && IsB32) {
3786
3787
3788
3792 return false;
3793 } else if (NumOpcodes < 4) {
3794
3795
3796
3797 return false;
3798 }
3799
3800 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3804
3805 for (unsigned I = 0; I < Src.size(); ++I) {
3807 if (RB->getID() != AMDGPU::SGPRRegBankID)
3808 continue;
3809 if (CBL > 0) {
3810 --CBL;
3811 continue;
3812 }
3813 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3814 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::COPY), NewReg)
3816 Src[I] = NewReg;
3817 }
3818
3819
3820
3821
3822
3823
3824 while (Src.size() < 3)
3825 Src.push_back(Src[0]);
3826
3827 auto MIB = BuildMI(*MBB, MI, DL, TII.get(Opc), DstReg);
3828 if (!IsB32)
3829 MIB.addImm(0);
3831 if (!IsB32)
3832 MIB.addImm(0);
3834 if (!IsB32)
3835 MIB.addImm(0);
3838 if (!IsB32)
3839 MIB.addImm(0);
3840
3842 MI.eraseFromParent();
3843
3844 return true;
3845}
3846
3847bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
3848 Register SrcReg = MI.getOperand(0).getReg();
3850 return false;
3851
3858
3859 if (!WaveAddr) {
3860 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3861 BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr)
3865 }
3866
3869
3870 MI.eraseFromParent();
3871 return true;
3872}
3873
3875
3876 if (.isPreISelOpcode()) {
3877 if (I.isCopy())
3878 return selectCOPY(I);
3879 return true;
3880 }
3881
3882 switch (I.getOpcode()) {
3883 case TargetOpcode::G_AND:
3884 case TargetOpcode::G_OR:
3885 case TargetOpcode::G_XOR:
3886 if (selectBITOP3(I))
3887 return true;
3889 return true;
3890 return selectG_AND_OR_XOR(I);
3891 case TargetOpcode::G_ADD:
3892 case TargetOpcode::G_SUB:
3893 case TargetOpcode::G_PTR_ADD:
3895 return true;
3896 return selectG_ADD_SUB(I);
3897 case TargetOpcode::G_UADDO:
3898 case TargetOpcode::G_USUBO:
3899 case TargetOpcode::G_UADDE:
3900 case TargetOpcode::G_USUBE:
3901 return selectG_UADDO_USUBO_UADDE_USUBE(I);
3902 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3903 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3904 return selectG_AMDGPU_MAD_64_32(I);
3905 case TargetOpcode::G_INTTOPTR:
3906 case TargetOpcode::G_BITCAST:
3907 case TargetOpcode::G_PTRTOINT:
3908 case TargetOpcode::G_FREEZE:
3909 return selectCOPY(I);
3910 case TargetOpcode::G_FNEG:
3912 return true;
3913 return selectG_FNEG(I);
3914 case TargetOpcode::G_FABS:
3916 return true;
3917 return selectG_FABS(I);
3918 case TargetOpcode::G_EXTRACT:
3919 return selectG_EXTRACT(I);
3920 case TargetOpcode::G_MERGE_VALUES:
3921 case TargetOpcode::G_CONCAT_VECTORS:
3922 return selectG_MERGE_VALUES(I);
3923 case TargetOpcode::G_UNMERGE_VALUES:
3924 return selectG_UNMERGE_VALUES(I);
3925 case TargetOpcode::G_BUILD_VECTOR:
3926 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3927 return selectG_BUILD_VECTOR(I);
3928 case TargetOpcode::G_IMPLICIT_DEF:
3929 return selectG_IMPLICIT_DEF(I);
3930 case TargetOpcode::G_INSERT:
3931 return selectG_INSERT(I);
3932 case TargetOpcode::G_INTRINSIC:
3933 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3934 return selectG_INTRINSIC(I);
3935 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3936 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3937 return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
3938 case TargetOpcode::G_ICMP:
3939 case TargetOpcode::G_FCMP:
3940 if (selectG_ICMP_or_FCMP(I))
3941 return true;
3943 case TargetOpcode::G_LOAD:
3944 case TargetOpcode::G_ZEXTLOAD:
3945 case TargetOpcode::G_SEXTLOAD:
3946 case TargetOpcode::G_STORE:
3947 case TargetOpcode::G_ATOMIC_CMPXCHG:
3948 case TargetOpcode::G_ATOMICRMW_XCHG:
3949 case TargetOpcode::G_ATOMICRMW_ADD:
3950 case TargetOpcode::G_ATOMICRMW_SUB:
3951 case TargetOpcode::G_ATOMICRMW_AND:
3952 case TargetOpcode::G_ATOMICRMW_OR:
3953 case TargetOpcode::G_ATOMICRMW_XOR:
3954 case TargetOpcode::G_ATOMICRMW_MIN:
3955 case TargetOpcode::G_ATOMICRMW_MAX:
3956 case TargetOpcode::G_ATOMICRMW_UMIN:
3957 case TargetOpcode::G_ATOMICRMW_UMAX:
3958 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3959 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3960 case TargetOpcode::G_ATOMICRMW_FADD:
3961 case TargetOpcode::G_ATOMICRMW_FMIN:
3962 case TargetOpcode::G_ATOMICRMW_FMAX:
3963 return selectG_LOAD_STORE_ATOMICRMW(I);
3964 case TargetOpcode::G_SELECT:
3965 return selectG_SELECT(I);
3966 case TargetOpcode::G_TRUNC:
3967 return selectG_TRUNC(I);
3968 case TargetOpcode::G_SEXT:
3969 case TargetOpcode::G_ZEXT:
3970 case TargetOpcode::G_ANYEXT:
3971 case TargetOpcode::G_SEXT_INREG:
3972
3973
3974
3975 if (MRI->getType(I.getOperand(1).getReg()) != LLT::scalar(1) &&
3977 return true;
3978 return selectG_SZA_EXT(I);
3979 case TargetOpcode::G_FPEXT:
3980 if (selectG_FPEXT(I))
3981 return true;
3983 case TargetOpcode::G_BRCOND:
3984 return selectG_BRCOND(I);
3985 case TargetOpcode::G_GLOBAL_VALUE:
3986 return selectG_GLOBAL_VALUE(I);
3987 case TargetOpcode::G_PTRMASK:
3988 return selectG_PTRMASK(I);
3989 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3990 return selectG_EXTRACT_VECTOR_ELT(I);
3991 case TargetOpcode::G_INSERT_VECTOR_ELT:
3992 return selectG_INSERT_VECTOR_ELT(I);
3993 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3994 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3995 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3996 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3997 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4000 assert(Intr && "not an image intrinsic with image pseudo");
4001 return selectImageIntrinsic(I, Intr);
4002 }
4003 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
4004 return selectBVHIntrinsic(I);
4005 case AMDGPU::G_SBFX:
4006 case AMDGPU::G_UBFX:
4007 return selectG_SBFX_UBFX(I);
4008 case AMDGPU::G_SI_CALL:
4009 I.setDesc(TII.get(AMDGPU::SI_CALL));
4010 return true;
4011 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4012 return selectWaveAddress(I);
4013 case AMDGPU::G_STACKRESTORE:
4014 return selectStackRestore(I);
4015 case AMDGPU::G_PHI:
4016 return selectPHI(I);
4017 case TargetOpcode::G_CONSTANT:
4018 case TargetOpcode::G_FCONSTANT:
4019 default:
4021 }
4022 return false;
4023}
4024
4026AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
4027 return {{
4029 }};
4030
4031}
4032
4033std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4034 Register Src, bool IsCanonicalizing, bool AllowAbs, bool OpSel) const {
4035 unsigned Mods = 0;
4037
4038 if (MI->getOpcode() == AMDGPU::G_FNEG) {
4039 Src = MI->getOperand(1).getReg();
4042 } else if (MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4043
4044
4047 if (LHS && LHS->isZero()) {
4049 Src = MI->getOperand(2).getReg();
4050 }
4051 }
4052
4053 if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) {
4054 Src = MI->getOperand(1).getReg();
4056 }
4057
4058 if (OpSel)
4060
4061 return std::pair(Src, Mods);
4062}
4063
4064Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4066 bool ForceVGPR) const {
4067 if ((Mods != 0 || ForceVGPR) &&
4068 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4069
4070
4071
4072
4075 TII.get(AMDGPU::COPY), VGPRSrc)
4077 Src = VGPRSrc;
4078 }
4079
4080 return Src;
4081}
4082
4083
4084
4085
4087AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
4088 return {{
4090 }};
4091}
4092
4094AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
4096 unsigned Mods;
4097 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
4098
4099 return {{
4101 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4102 },
4106 }};
4107}
4108
4110AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
4112 unsigned Mods;
4113 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),
4114 true,
4115 false);
4116
4117 return {{
4119 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4120 },
4124 }};
4125}
4126
4128AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
4129 return {{
4133 }};
4134}
4135
4137AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
4139 unsigned Mods;
4140 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
4141
4142 return {{
4144 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4145 },
4147 }};
4148}
4149
4151AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4154 unsigned Mods;
4155 std::tie(Src, Mods) =
4156 selectVOP3ModsImpl(Root.getReg(), false);
4157
4158 return {{
4160 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4161 },
4163 }};
4164}
4165
4167AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
4169 unsigned Mods;
4170 std::tie(Src, Mods) =
4171 selectVOP3ModsImpl(Root.getReg(), true,
4172 false);
4173
4174 return {{
4176 MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4177 },
4179 }};
4180}
4181
4183AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
4186 if (Def->getOpcode() == AMDGPU::G_FNEG || Def->getOpcode() == AMDGPU::G_FABS)
4187 return {};
4188 return {{
4190 }};
4191}
4192
4193std::pair<Register, unsigned>
4194AMDGPUInstructionSelector::selectVOP3PModsImpl(
4196 unsigned Mods = 0;
4198
4199 if (MI->getOpcode() == AMDGPU::G_FNEG &&
4200
4201
4204 Src = MI->getOperand(1).getReg();
4205 MI = MRI.getVRegDef(Src);
4206 }
4207
4208
4209
4210
4211 (void)IsDOT;
4212
4213
4215
4216 return std::pair(Src, Mods);
4217}
4218
4220AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {
4223
4225 unsigned Mods;
4226 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);
4227
4228 return {{
4231 }};
4232}
4233
4235AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
4238
4240 unsigned Mods;
4241 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI, true);
4242
4243 return {{
4246 }};
4247}
4248
4250AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {
4251
4252
4253
4255 "expected i1 value");
4257 if (Root.getImm() == -1)
4259 return {{
4261 }};
4262}
4263
4265AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4268 "expected i1 value");
4270 if (Root.getImm() != 0)
4272
4273 return {{
4275 }};
4276}
4277
4282 switch (Elts.size()) {
4283 case 8:
4284 DstRegClass = &AMDGPU::VReg_256RegClass;
4285 break;
4286 case 4:
4287 DstRegClass = &AMDGPU::VReg_128RegClass;
4288 break;
4289 case 2:
4290 DstRegClass = &AMDGPU::VReg_64RegClass;
4291 break;
4292 default:
4294 }
4295
4297 auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE)
4298 .addDef(MRI.createVirtualRegister(DstRegClass));
4299 for (unsigned i = 0; i < Elts.size(); ++i) {
4300 MIB.addReg(Elts[i]);
4302 }
4304}
4305
4310 if (ModOpcode == TargetOpcode::G_FNEG) {
4312
4314 for (auto El : Elts) {
4317 break;
4319 }
4320 if (Elts.size() != NegAbsElts.size()) {
4321
4323 } else {
4324
4327 }
4328 } else {
4329 assert(ModOpcode == TargetOpcode::G_FABS);
4330
4333 }
4334}
4335
4337AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const {
4341
4342 if (GBuildVector *BV = dyn_cast(MRI->getVRegDef(Src))) {
4343 assert(BV->getNumSources() > 0);
4344
4345 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
4346 unsigned ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG)
4347 ? AMDGPU::G_FNEG
4348 : AMDGPU::G_FABS;
4349 for (unsigned i = 0; i < BV->getNumSources(); ++i) {
4350 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
4351 if (ElF32->getOpcode() != ModOpcode)
4352 break;
4354 }
4355
4356
4357 if (BV->getNumSources() == EltsF32.size()) {
4359 *MRI);
4360 }
4361 }
4362
4365}
4366
4368AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const {
4372
4373 if (GConcatVectors *CV = dyn_cast(MRI->getVRegDef(Src))) {
4374 for (unsigned i = 0; i < CV->getNumSources(); ++i) {
4377 break;
4379 }
4380
4381
4382 if (CV->getNumSources() == EltsV2F16.size()) {
4386 }
4387 }
4388
4391}
4392
4394AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const {
4398
4399 if (GConcatVectors *CV = dyn_cast(MRI->getVRegDef(Src))) {
4400 assert(CV->getNumSources() > 0);
4401 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
4402
4403 unsigned ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG)
4404 ? AMDGPU::G_FNEG
4405 : AMDGPU::G_FABS;
4406
4407 for (unsigned i = 0; i < CV->getNumSources(); ++i) {
4408 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
4409 if (ElV2F16->getOpcode() != ModOpcode)
4410 break;
4412 }
4413
4414
4415 if (CV->getNumSources() == EltsV2F16.size()) {
4418 *MRI);
4419 }
4420 }
4421
4424}
4425
4427AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const {
4428 std::optional FPValReg;
4432 MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4433 }}};
4434 }
4435
4436
4437 return {};
4438 }
4439
4443 return {
4445 }
4446 }
4447
4448 return {};
4449}
4450
4452AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const {
4455 unsigned Key = 0;
4456
4458 std::optional ShiftAmt;
4460 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4461 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4462 Key = ShiftAmt->Value.getZExtValue() / 8;
4463 Src = ShiftSrc;
4464 }
4465
4466 return {{
4469 }};
4470}
4471
4473AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const {
4474
4477 unsigned Key = 0;
4478
4480 std::optional ShiftAmt;
4482 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4483 ShiftAmt->Value.getZExtValue() == 16) {
4484 Src = ShiftSrc;
4485 Key = 1;
4486 }
4487
4488 return {{
4491 }};
4492}
4493
4495AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
4497 unsigned Mods;
4498 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
4499
4500
4501 return {{
4504 }};
4505}
4506
4508AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
4510 unsigned Mods;
4511 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),
4512 true,
4513 false,
4514 false);
4515
4516 return {{
4519 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, true));
4520 },
4522 }};
4523}
4524
4526AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
4528 unsigned Mods;
4529 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),
4530 true,
4531 false,
4532 true);
4533
4534 return {{
4537 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, true));
4538 },
4540 }};
4541}
4542
4543bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
4546 int64_t *Offset) const {
4549
4550
4551
4553 getAddrModeInfo(*MI, *MRI, AddrInfo);
4554
4555 if (AddrInfo.empty())
4556 return false;
4557
4558 const GEPInfo &GEPI = AddrInfo[0];
4559 std::optional<int64_t> EncodedImm;
4560
4561 if (SOffset && Offset) {
4563 true);
4564 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4565 AddrInfo.size() > 1) {
4566 const GEPInfo &GEPI2 = AddrInfo[1];
4567 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4570 Base = GEPI2.SgprParts[0];
4571 *SOffset = OffsetReg;
4572 *Offset = *EncodedImm;
4574 return true;
4575
4576
4577
4578
4579
4581 if (*Offset + SKnown.getMinValue().getSExtValue() < 0)
4582 return false;
4583
4584 return true;
4585 }
4586 }
4587 }
4588 return false;
4589 }
4590
4592 false);
4593 if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4594 Base = GEPI.SgprParts[0];
4595 *Offset = *EncodedImm;
4596 return true;
4597 }
4598
4599
4600 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4601 GEPI.Imm != 0) {
4602
4603
4604
4605
4606 Base = GEPI.SgprParts[0];
4607 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4608 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4610 return true;
4611 }
4612
4613 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4615 Base = GEPI.SgprParts[0];
4616 *SOffset = OffsetReg;
4617 return true;
4618 }
4619 }
4620
4621 return false;
4622}
4623
4625AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
4628 if (!selectSmrdOffset(Root, Base, nullptr, &Offset))
4629 return std::nullopt;
4630
4633}
4634
4636AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
4638 getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
4639
4640 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
4641 return std::nullopt;
4642
4643 const GEPInfo &GEPInfo = AddrInfo[0];
4644 Register PtrReg = GEPInfo.SgprParts[0];
4645 std::optional<int64_t> EncodedImm =
4647 if (!EncodedImm)
4648 return std::nullopt;
4649
4650 return {{
4653 }};
4654}
4655
4657AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
4659 if (!selectSmrdOffset(Root, Base, &SOffset, nullptr))
4660 return std::nullopt;
4661
4664}
4665
4667AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {
4670 if (!selectSmrdOffset(Root, Base, &SOffset, &Offset))
4671 return std::nullopt;
4672
4676}
4677
4678std::pair<Register, int>
4679AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
4680 uint64_t FlatVariant) const {
4682
4684
4687
4689 int64_t ConstOffset;
4690 std::tie(PtrBase, ConstOffset) =
4691 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
4692
4694 !isFlatScratchBaseLegal(Root.getReg())))
4696
4697 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
4698 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
4700
4701 return std::pair(PtrBase, ConstOffset);
4702}
4703
4705AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
4706 auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FLAT);
4707
4708 return {{
4711 }};
4712}
4713
4715AMDGPUInstructionSelector::selectGlobalOffset(MachineOperand &Root) const {
4717
4718 return {{
4721 }};
4722}
4723
4725AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const {
4727
4728 return {{
4731 }};
4732}
4733
4734
4736AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
4739 int64_t ConstOffset;
4740 int64_t ImmOffset = 0;
4741
4742
4743
4744 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
4745
4746 if (ConstOffset != 0) {
4749 Addr = PtrBase;
4750 ImmOffset = ConstOffset;
4751 } else {
4753 if (isSGPR(PtrBaseDef->Reg)) {
4754 if (ConstOffset > 0) {
4755
4756
4757
4758
4759
4760 int64_t SplitImmOffset, RemainderOffset;
4761 std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset(
4763
4764 if (isUInt<32>(RemainderOffset)) {
4768 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4769
4770 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4771 HighBits)
4772 .addImm(RemainderOffset);
4773
4774 return {{
4777 MIB.addReg(HighBits);
4778 },
4780 }};
4781 }
4782 }
4783
4784
4785
4786
4787
4788
4789 unsigned NumLiterals =
4793 return std::nullopt;
4794 }
4795 }
4796 }
4797
4798
4800 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4801
4804
4805 if (isSGPR(SAddr)) {
4806 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4807
4808
4809
4813 },
4815 MIB.addReg(VOffset);
4816 },
4818 MIB.addImm(ImmOffset);
4819 }}};
4820 }
4821 }
4822 }
4823
4824
4825
4826 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4827 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4828 return std::nullopt;
4829
4830
4831
4834 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4835
4836 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4838
4839 return {{
4843 }};
4844}
4845
4847AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
4850 int64_t ConstOffset;
4851 int64_t ImmOffset = 0;
4852
4853
4854
4855 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
4856
4857 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
4860 Addr = PtrBase;
4861 ImmOffset = ConstOffset;
4862 }
4863
4865 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4866 int FI = AddrDef->MI->getOperand(1).getIndex();
4867 return {{
4870 }};
4871 }
4872
4873 Register SAddr = AddrDef->Reg;
4874
4875 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4876 Register LHS = AddrDef->MI->getOperand(1).getReg();
4877 Register RHS = AddrDef->MI->getOperand(2).getReg();
4880
4881 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4882 isSGPR(RHSDef->Reg)) {
4883 int FI = LHSDef->MI->getOperand(1).getIndex();
4887 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4888
4889 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4891 .addReg(RHSDef->Reg)
4893 }
4894 }
4895
4896 if (!isSGPR(SAddr))
4897 return std::nullopt;
4898
4899 return {{
4902 }};
4903}
4904
4905
4906bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4909 return false;
4910
4911
4912
4913
4917 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4918 uint64_t SMax = SKnown.getMaxValue().getZExtValue();
4919 return (VMax & 3) + (SMax & 3) >= 4;
4920}
4921
4923AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
4926 int64_t ConstOffset;
4927 int64_t ImmOffset = 0;
4928
4929
4930
4931 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
4932
4934 if (ConstOffset != 0 &&
4936 Addr = PtrBase;
4937 ImmOffset = ConstOffset;
4938 }
4939
4941 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4942 return std::nullopt;
4943
4944 Register RHS = AddrDef->MI->getOperand(2).getReg();
4945 if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
4946 return std::nullopt;
4947
4948 Register LHS = AddrDef->MI->getOperand(1).getReg();
4950
4951 if (OrigAddr != Addr) {
4952 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4953 return std::nullopt;
4954 } else {
4955 if (!isFlatScratchBaseLegalSV(OrigAddr))
4956 return std::nullopt;
4957 }
4958
4959 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4960 return std::nullopt;
4961
4962 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4963 int FI = LHSDef->MI->getOperand(1).getIndex();
4964 return {{
4968 }};
4969 }
4970
4971 if (!isSGPR(LHS))
4972 return std::nullopt;
4973
4974 return {{
4978 }};
4979}
4980
4982AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
4987
4991 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4992
4993
4994
4996 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4997 HighBits)
4999
5001 MIB.addReg(Info->getScratchRSrcReg());
5002 },
5004 MIB.addReg(HighBits);
5005 },
5007
5008
5010 },
5013 }}};
5014 }
5015
5017
5018
5019
5020 std::optional FI;
5022
5025 int64_t ConstOffset;
5026 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
5027 if (ConstOffset != 0) {
5031 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
5032 if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
5034 else
5035 VAddr = PtrBase;
5036 Offset = ConstOffset;
5037 }
5038 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5040 }
5041
5043 MIB.addReg(Info->getScratchRSrcReg());
5044 },
5046 if (FI)
5048 else
5050 },
5052
5053
5055 },
5058 }}};
5059}
5060
5061bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,
5062 int64_t Offset) const {
5063 if (!isUInt<16>(Offset))
5064 return false;
5065
5067 return true;
5068
5069
5070
5072}
5073
5074bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
5075 int64_t Offset1,
5076 unsigned Size) const {
5077 if (Offset0 % Size != 0 || Offset1 % Size != 0)
5078 return false;
5079 if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
5080 return false;
5081
5083 return true;
5084
5085
5086
5088}
5089
5090
5092 return Addr->getOpcode() == TargetOpcode::G_OR ||
5093 (Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
5095}
5096
5097
5098
5099
5100bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
5102
5104 return true;
5105
5106
5107
5109 return true;
5110
5113
5114 if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
5115 std::optional RhsValReg =
5117
5118
5119
5120
5121 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
5122 RhsValReg->Value.getSExtValue() > -0x40000000)
5123 return true;
5124 }
5125
5127}
5128
5129
5130
5131bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const {
5133
5135 return true;
5136
5137
5138
5140 return true;
5141
5145}
5146
5147
5148
5149bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
5151
5152
5154 return true;
5155
5158 std::optional BaseDef =
5160 std::optional RHSOffset =
5163
5164
5165
5166
5167
5170 (RHSOffset->Value.getSExtValue() < 0 &&
5171 RHSOffset->Value.getSExtValue() > -0x40000000)))
5172 return true;
5173
5174 Register LHS = BaseDef->MI->getOperand(1).getReg();
5175 Register RHS = BaseDef->MI->getOperand(2).getReg();
5177}
5178
5179bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
5180 unsigned ShAmtBits) const {
5181 assert(MI.getOpcode() == TargetOpcode::G_AND);
5182
5183 std::optional RHS =
5185 if (!RHS)
5186 return false;
5187
5188 if (RHS->countr_one() >= ShAmtBits)
5189 return true;
5190
5192 return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;
5193}
5194
5196AMDGPUInstructionSelector::selectMUBUFScratchOffset(
5200
5201 std::optional Def =
5203 assert(Def && "this shouldn't be an optional result");
5205
5207 return {{
5209 MIB.addReg(Info->getScratchRSrcReg());
5210 },
5212 MIB.addReg(WaveBase);
5213 },
5215 }};
5216 }
5217
5219
5220
5226 return {};
5229 if (!WaveBase)
5230 return {};
5231
5232 return {{
5234 MIB.addReg(Info->getScratchRSrcReg());
5235 },
5237 MIB.addReg(WaveBase);
5238 },
5240 }};
5241 }
5242
5245 return {};
5246
5247 return {{
5249 MIB.addReg(Info->getScratchRSrcReg());
5250 },
5253 },
5255 }};
5256}
5257
5258std::pair<Register, unsigned>
5259AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const {
5261 int64_t ConstAddr = 0;
5262
5265 std::tie(PtrBase, Offset) =
5266 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
5267
5269 if (isDSOffsetLegal(PtrBase, Offset)) {
5270
5271 return std::pair(PtrBase, Offset);
5272 }
5273 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
5274
5275
5276
5278
5279
5280 }
5281
5282 return std::pair(Root.getReg(), 0);
5283}
5284
5286AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
5289 std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);
5290 return {{
5293 }};
5294}
5295
5297AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {
5298 return selectDSReadWrite2(Root, 4);
5299}
5300
5302AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(MachineOperand &Root) const {
5303 return selectDSReadWrite2(Root, 8);
5304}
5305
5307AMDGPUInstructionSelector::selectDSReadWrite2(MachineOperand &Root,
5308 unsigned Size) const {
5311 std::tie(Reg, Offset) = selectDSReadWrite2Impl(Root, Size);
5312 return {{
5316 }};
5317}
5318
5319std::pair<Register, unsigned>
5320AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root,
5321 unsigned Size) const {
5323 int64_t ConstAddr = 0;
5324
5327 std::tie(PtrBase, Offset) =
5328 getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
5329
5331 int64_t OffsetValue0 = Offset;
5332 int64_t OffsetValue1 = Offset + Size;
5333 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1, Size)) {
5334
5335 return std::pair(PtrBase, OffsetValue0 / Size);
5336 }
5337 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
5338
5339
5341
5342
5343 }
5344
5345 return std::pair(Root.getReg(), 0);
5346}
5347
5348
5349
5350
5351
5352std::pair<Register, int64_t>
5353AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
5356 if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
5357 return {Root, 0};
5358
5360 std::optional MaybeOffset =
5362 if (!MaybeOffset)
5363 return {Root, 0};
5364 return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()};
5365}
5366
5369}
5370
5371
5372
5376 Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5377 Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5378 Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5379 Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5380
5381 B.buildInstr(AMDGPU::S_MOV_B32)
5382 .addDef(RSrc2)
5383 .addImm(FormatLo);
5384 B.buildInstr(AMDGPU::S_MOV_B32)
5385 .addDef(RSrc3)
5386 .addImm(FormatHi);
5387
5388
5389
5390
5391 B.buildInstr(AMDGPU::REG_SEQUENCE)
5392 .addDef(RSrcHi)
5393 .addReg(RSrc2)
5394 .addImm(AMDGPU::sub0)
5395 .addReg(RSrc3)
5396 .addImm(AMDGPU::sub1);
5397
5399 if (!BasePtr) {
5400 RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5401 B.buildInstr(AMDGPU::S_MOV_B64)
5402 .addDef(RSrcLo)
5403 .addImm(0);
5404 }
5405
5406 B.buildInstr(AMDGPU::REG_SEQUENCE)
5407 .addDef(RSrc)
5408 .addReg(RSrcLo)
5409 .addImm(AMDGPU::sub0_sub1)
5410 .addReg(RSrcHi)
5411 .addImm(AMDGPU::sub2_sub3);
5412
5413 return RSrc;
5414}
5415
5418 uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
5419
5420
5421
5423}
5424
5427 uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
5428
5429
5430
5432}
5433
5434AMDGPUInstructionSelector::MUBUFAddressData
5435AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const {
5436 MUBUFAddressData Data;
5437 Data.N0 = Src;
5438
5441
5442 std::tie(PtrBase, Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5443 if (isUInt<32>(Offset)) {
5444 Data.N0 = PtrBase;
5446 }
5447
5450 Data.N2 = InputAdd->getOperand(1).getReg();
5451 Data.N3 = InputAdd->getOperand(2).getReg();
5452
5453
5454
5455
5456
5457
5460 }
5461
5462 return Data;
5463}
5464
5465
5466bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
5467
5468
5469 if (Addr.N2)
5470 return true;
5471
5473 return N0Bank->getID() == AMDGPU::VGPRRegBankID;
5474}
5475
5476
5477
5478
5479void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5482 return;
5483
5484
5485 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5486 B.buildInstr(AMDGPU::S_MOV_B32)
5487 .addDef(SOffset)
5488 .addImm(ImmOffset);
5489 ImmOffset = 0;
5490}
5491
5492bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5495
5496
5498 return false;
5499
5500 MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
5501 if (!shouldUseAddr64(AddrData))
5502 return false;
5503
5507 Offset = AddrData.Offset;
5508
5509
5511
5512 if (N2) {
5513 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
5515 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
5516
5517
5518 VAddr = N0;
5519 } else {
5520 SRDPtr = N3;
5521 VAddr = N2;
5522 }
5523 } else {
5524
5525 SRDPtr = N2;
5526 VAddr = N3;
5527 }
5528 } else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
5529
5530 VAddr = N0;
5531 } else {
5532
5533
5534 SRDPtr = N0;
5535 }
5536
5539 splitIllegalMUBUFOffset(B, SOffset, Offset);
5540 return true;
5541}
5542
5543bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5545 int64_t &Offset) const {
5546
5547
5549 return false;
5550
5551 MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
5552 if (shouldUseAddr64(AddrData))
5553 return false;
5554
5555
5556
5557 Register SRDPtr = AddrData.N0;
5558 Offset = AddrData.Offset;
5559
5560
5562
5564 splitIllegalMUBUFOffset(B, SOffset, Offset);
5565 return true;
5566}
5567
5569AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
5574
5575 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))
5576 return {};
5577
5578
5579
5580 return {{
5582 MIB.addReg(RSrcReg);
5583 },
5586 },
5588 if (SOffset)
5589 MIB.addReg(SOffset);
5591 MIB.addReg(AMDGPU::SGPR_NULL);
5592 else
5594 },
5597 },
5601 }};
5602}
5603
5605AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
5609
5610 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))
5611 return {};
5612
5613 return {{
5615 MIB.addReg(RSrcReg);
5616 },
5618 if (SOffset)
5619 MIB.addReg(SOffset);
5621 MIB.addReg(AMDGPU::SGPR_NULL);
5622 else
5624 },
5629 }};
5630}
5631
5633AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {
5634
5636
5638 SOffset = AMDGPU::SGPR_NULL;
5639
5641}
5642
5643
5644static std::optional<uint64_t>
5646
5648 if (!OffsetVal || !isInt<32>(*OffsetVal))
5649 return std::nullopt;
5650 return Lo_32(*OffsetVal);
5651}
5652
5654AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
5655 std::optional<uint64_t> OffsetVal =
5657 if (!OffsetVal)
5658 return {};
5659
5660 std::optional<int64_t> EncodedImm =
5662 if (!EncodedImm)
5663 return {};
5664
5666}
5667
5669AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
5671
5673 if (!OffsetVal)
5674 return {};
5675
5676 std::optional<int64_t> EncodedImm =
5678 if (!EncodedImm)
5679 return {};
5680
5682}
5683
5685AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
5686
5687
5691 *MRI, Root.getReg(), KB, true);
5692 if (!SOffset)
5693 return std::nullopt;
5694
5695 std::optional<int64_t> EncodedOffset =
5697 if (!EncodedOffset)
5698 return std::nullopt;
5699
5703}
5704
5705std::pair<Register, unsigned>
5706AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
5707 bool &Matched) const {
5708 Matched = false;
5709
5711 unsigned Mods;
5712 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
5713
5716
5717
5718
5719
5721
5722 const auto CheckAbsNeg = [&]() {
5723
5724
5726 unsigned ModsTmp;
5727 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
5728
5731
5734 }
5735 };
5736
5737 CheckAbsNeg();
5738
5739
5740
5741
5742
5743
5745
5748 CheckAbsNeg();
5749 }
5750
5751 Matched = true;
5752 }
5753
5754 return {Src, Mods};
5755}
5756
5758AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5761 unsigned Mods;
5762 bool Matched;
5763 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5764 if (!Matched)
5765 return {};
5766
5767 return {{
5770 }};
5771}
5772
5774AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {
5776 unsigned Mods;
5777 bool Matched;
5778 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5779
5780 return {{
5783 }};
5784}
5785
5786bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5790 Register CCReg = I.getOperand(0).getReg();
5791
5792 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5793 .addImm(I.getOperand(2).getImm());
5794
5795 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);
5796
5797 I.eraseFromParent();
5799 *MRI);
5800}
5801
5802bool AMDGPUInstructionSelector::selectSGetBarrierState(
5807 std::optional<int64_t> BarValImm =
5809
5810 if (!BarValImm) {
5811 auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5814 }
5816 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
5817 : AMDGPU::S_GET_BARRIER_STATE_M0;
5819
5820 auto DstReg = I.getOperand(0).getReg();
5822 TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);
5824 return false;
5826 if (BarValImm) {
5827 MIB.addImm(*BarValImm);
5828 }
5829 I.eraseFromParent();
5830 return true;
5831}
5832
5834 if (HasInlineConst) {
5835 switch (IntrID) {
5836 default:
5838 case Intrinsic::amdgcn_s_barrier_join:
5839 return AMDGPU::S_BARRIER_JOIN_IMM;
5840 case Intrinsic::amdgcn_s_get_named_barrier_state:
5841 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5842 };
5843 } else {
5844 switch (IntrID) {
5845 default:
5847 case Intrinsic::amdgcn_s_barrier_join:
5848 return AMDGPU::S_BARRIER_JOIN_M0;
5849 case Intrinsic::amdgcn_s_get_named_barrier_state:
5850 return AMDGPU::S_GET_BARRIER_STATE_M0;
5851 };
5852 }
5853}
5854
5855bool AMDGPUInstructionSelector::selectNamedBarrierInit(
5861
5862
5863 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5864 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg0)
5865 .add(BarOp)
5868
5869 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5870 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg1)
5874
5875
5876 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5877 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg2)
5878 .add(CntOp)
5881
5882 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5883 constexpr unsigned ShAmt = 16;
5884 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg3)
5888
5889 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5890 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg4)
5894
5895 auto CopyMIB =
5896 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(TmpReg4);
5898
5899 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
5900 ? AMDGPU::S_BARRIER_INIT_M0
5901 : AMDGPU::S_BARRIER_SIGNAL_M0;
5904
5905 I.eraseFromParent();
5906 return true;
5907}
5908
5909bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5913 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
5914 ? I.getOperand(2)
5915 : I.getOperand(1);
5916 std::optional<int64_t> BarValImm =
5918
5919 if (!BarValImm) {
5920
5921 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5922 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg0)
5926
5927 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5928 BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_AND_B32), TmpReg1)
5932
5933 auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5936 }
5937
5939 unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID);
5941
5942 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
5943 auto DstReg = I.getOperand(0).getReg();
5945 TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);
5947 return false;
5949 }
5950
5951 if (BarValImm) {
5952 auto BarId = ((*BarValImm) >> 4) & 0x3F;
5954 }
5955
5956 I.eraseFromParent();
5957 return true;
5958}
5959
5960void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
5962 int OpIdx) const {
5963 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5964 "Expected G_CONSTANT");
5965 MIB.addImm(MI.getOperand(1).getCImm()->getSExtValue());
5966}
5967
5968void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
5970 int OpIdx) const {
5971 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5972 "Expected G_CONSTANT");
5973 MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
5974}
5975
5976void AMDGPUInstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB,
5978 int OpIdx) const {
5980 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
5981 MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5982}
5983
5984void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
5986 int OpIdx) const {
5987 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5988 "Expected G_CONSTANT");
5989 MIB.addImm(MI.getOperand(1).getCImm()->getValue().popcount());
5990}
5991
5992
5993
5994void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
5996 int OpIdx) const {
5998 int64_t Imm;
6001 else
6003}
6004
6005void AMDGPUInstructionSelector::renderZextBoolTImm(MachineInstrBuilder &MIB,
6007 int OpIdx) const {
6008 MIB.addImm(MI.getOperand(OpIdx).getImm() != 0);
6009}
6010
6011void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB,
6013 int OpIdx) const {
6014 assert(OpIdx >= 0 && "expected to match an immediate operand");
6016}
6017
6018void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6020 assert(OpIdx >= 0 && "expected to match an immediate operand");
6023}
6024
6025void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6027 assert(OpIdx >= 0 && "expected to match an immediate operand");
6028 MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x2)
6031}
6032
6033void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6035 assert(OpIdx >= 0 && "expected to match an immediate operand");
6038}
6039
6040void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6042 assert(OpIdx >= 0 && "expected to match an immediate operand");
6043 MIB.addImm((MI.getOperand(OpIdx).getImm() & 0x1)
6045 : 0);
6046}
6047
6048void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6050 assert(OpIdx >= 0 && "expected to match an immediate operand");
6052 : 0);
6053}
6054
6055void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6057 assert(OpIdx >= 0 && "expected to match an immediate operand");
6059 : 0);
6060}
6061
6062void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6064 assert(OpIdx >= 0 && "expected to match an immediate operand");
6067}
6068
6069void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
6071 assert(OpIdx >= 0 && "expected to match an immediate operand");
6074}
6075
6076void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
6078 int OpIdx) const {
6079 assert(OpIdx >= 0 && "expected to match an immediate operand");
6080 MIB.addImm(MI.getOperand(OpIdx).getImm() &
6083}
6084
6085void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
6087 int OpIdx) const {
6088 assert(OpIdx >= 0 && "expected to match an immediate operand");
6089 const bool Swizzle = MI.getOperand(OpIdx).getImm() &
6093}
6094
6095void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
6097 assert(OpIdx >= 0 && "expected to match an immediate operand");
6098 const uint32_t Cpol = MI.getOperand(OpIdx).getImm() &
6102}
6103
6104void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,
6106 int OpIdx) const {
6108}
6109
6110void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
6112 int OpIdx) const {
6113 const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF();
6115 assert(ExpVal != INT_MIN);
6117}
6118
6119void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
6121 int OpIdx) const {
6122
6123
6124
6125
6126 MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
6127}
6128
6129
6130void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
6132 unsigned Val = MI.getOperand(OpIdx).getImm();
6133 unsigned New = 0;
6134 if (Val & 0x1)
6136 if (Val & 0x2)
6139}
6140
6141bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
6143}
6144
6145bool AMDGPUInstructionSelector::isInlineImmediate(const APFloat &Imm) const {
6147}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
MCRegister getExec() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.