LLVM: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
72
85#include "llvm/IR/IntrinsicsAMDGPU.h"
86
87#define GET_TARGET_REGBANK_IMPL
88#include "AMDGPUGenRegisterBank.inc"
89
90
91#include "AMDGPUGenRegisterBankInfo.def"
92
93using namespace llvm;
95
96namespace {
97
98
100private:
106
107public:
110 : B(B), RBI(RBI_), MRI(MRI_), NewBank(RB) {
111 assert(.isObservingChanges());
112 B.setChangeObserver(*this);
113 }
114
115 ~ApplyRegBankMapping() override {
117 applyBank(*MI);
118
119 B.stopObservingChanges();
120 }
121
122
124 const unsigned Opc = MI.getOpcode();
125 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
126 Opc == AMDGPU::G_SEXT) {
127
128
129
130 Register DstReg = MI.getOperand(0).getReg();
131 Register SrcReg = MI.getOperand(1).getReg();
133 if (SrcBank == &AMDGPU::VCCRegBank) {
137 assert(NewBank == &AMDGPU::VGPRRegBank);
138
139
140
141 B.setInsertPt(*MI.getParent(), MI);
142
143 auto True = B.buildConstant(S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
144 auto False = B.buildConstant(S32, 0);
145 B.buildSelect(DstReg, SrcReg, True, False);
146 MRI.setRegBank(True.getReg(0), *NewBank);
147 MRI.setRegBank(False.getReg(0), *NewBank);
148 MI.eraseFromParent();
149 }
150
151 assert(.getRegClassOrRegBank(DstReg));
152 MRI.setRegBank(DstReg, *NewBank);
153 return;
154 }
155
156#ifndef NDEBUG
157 if (Opc == AMDGPU::G_TRUNC) {
158 Register DstReg = MI.getOperand(0).getReg();
160 assert(DstBank != &AMDGPU::VCCRegBank);
161 }
162#endif
163
165 if (.isReg())
166 continue;
167
168
170 if (Reg.isPhysical() || MRI.getRegClassOrRegBank(Reg))
171 continue;
172
175 assert(NewBank == &AMDGPU::VGPRRegBank &&
176 "s1 operands should only be used for vector bools");
177 assert((MI.getOpcode() != AMDGPU::G_TRUNC &&
178 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
179 "not expecting legalization artifacts here");
180 RB = &AMDGPU::VCCRegBank;
181 }
182
184 }
185 }
186
188
190
192 }
193
196
197
198
199
200 }
201};
202
203}
204
208
209
211
212 static auto InitializeRegisterBankOnce = [this]() {
213 assert(&getRegBank(AMDGPU::SGPRRegBankID) == &AMDGPU::SGPRRegBank &&
214 &getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
215 &getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
216 (void)this;
217 };
218
219 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
220}
221
223 unsigned BankID = Bank.getID();
224 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
225}
226
228 return RB != &AMDGPU::SGPRRegBank;
229}
230
234
235 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
237 return std::numeric_limits::max();
238 }
239
240
241
242
243
244
245
246
247 if (Size == 1 &&
248 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
250 Src.getID() == AMDGPU::SGPRRegBankID ||
251 Src.getID() == AMDGPU::VCCRegBankID))
252 return std::numeric_limits::max();
253
254
255 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
256 Src.getID() == AMDGPU::AGPRRegBankID)
257 return 4;
258
260}
261
265
266
267
269 return 10;
270
277
278
279
280
281
282
283
284 return 1;
285}
286
289 LLT Ty) const {
290
291
292 if (TRI->isSGPRClass(&RC)) {
293
294
295
296 if (!Ty.isValid())
297 return AMDGPU::SGPRRegBank;
298
299 return Ty == LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
300 }
301
302 return TRI->isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
303}
304
305template
309 const std::array<unsigned, NumOps> RegSrcOpIdx,
311
313
315
317 for (unsigned I = 0; I < NumOps; ++I) {
318 Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
320 }
321
322 for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
324 Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
325 }
326
327
328 unsigned MappingID = 2;
329 for (const auto &Entry : Table) {
330 for (unsigned I = 0; I < NumOps; ++I) {
331 int OpIdx = RegSrcOpIdx[I];
332 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
333 }
334
337 Operands.size()));
338 }
339
340 return AltMappings;
341}
342
347 case Intrinsic::amdgcn_readlane: {
349
350 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
351
352
353 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
354 };
355
356 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
358 }
359 case Intrinsic::amdgcn_writelane: {
361
362 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
363
364
365 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
366
367
368 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
369
370
371 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
372 };
373
374
375 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
377 }
378 default:
380 }
381}
382
386
388 case Intrinsic::amdgcn_s_buffer_load: {
390
391 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
392
393
394 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
395
396
397 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
398
399
400 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
401 };
402
403
404 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
406 }
407 case Intrinsic::amdgcn_ds_ordered_add:
408 case Intrinsic::amdgcn_ds_ordered_swap: {
409
411
412 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
413
414
415 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
416 };
417
418 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
420 }
421 case Intrinsic::amdgcn_s_sendmsg:
422 case Intrinsic::amdgcn_s_sendmsghalt: {
423
425
426 { { AMDGPU::SGPRRegBankID }, 1 },
427
428
429 { { AMDGPU::VGPRRegBankID }, 3 }
430 };
431
432 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
434 }
435 default:
437 }
438}
439
440
441
443 if (.hasOneMemOperand())
444 return false;
445
451
452
454 (Subtarget.hasScalarSubwordLoads() &&
455 ((MemSize == 16 && MMO->getAlign() >= Align(2)) ||
456 (MemSize == 8 && MMO->getAlign() >= Align(1))))) &&
457
459
460
462
465}
466
470
473
474
476 switch (MI.getOpcode()) {
477 case TargetOpcode::G_CONSTANT:
478 case TargetOpcode::G_IMPLICIT_DEF: {
480 if (Size == 1) {
482 { { AMDGPU::VGPRRegBankID }, 1 },
483 { { AMDGPU::SGPRRegBankID }, 1 },
484 { { AMDGPU::VCCRegBankID }, 1 }
485 };
486
488 }
489
490 [[fallthrough]];
491 }
492 case TargetOpcode::G_FCONSTANT:
493 case TargetOpcode::G_FRAME_INDEX:
494 case TargetOpcode::G_GLOBAL_VALUE: {
496 { { AMDGPU::VGPRRegBankID }, 1 },
497 { { AMDGPU::SGPRRegBankID }, 1 }
498 };
499
501 }
502 case TargetOpcode::G_AND:
503 case TargetOpcode::G_OR:
504 case TargetOpcode::G_XOR: {
506
507 if (Size == 1) {
508
511 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
512 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
513 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
514 3);
515 AltMappings.push_back(&SCCMapping);
516
519 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
520 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
521 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
522 3);
523 AltMappings.push_back(&VCCMapping0);
524 return AltMappings;
525 }
526
527 if (Size != 64)
528 break;
529
532 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
533 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
534 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
535 3);
536 AltMappings.push_back(&SSMapping);
537
540 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
541 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
542 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
543 3);
544 AltMappings.push_back(&VVMapping);
545 break;
546 }
547 case TargetOpcode::G_LOAD:
548 case TargetOpcode::G_ZEXTLOAD:
549 case TargetOpcode::G_SEXTLOAD: {
551 LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
554
560 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
561 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
562 2);
563 AltMappings.push_back(&SSMapping);
564 }
565
567 2, 1,
569 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
570 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
571 2);
572 AltMappings.push_back(&VVMapping);
573
574
575
576
577
578
579
580 return AltMappings;
581
582 }
583 case TargetOpcode::G_SELECT: {
587 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
588 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
590 4);
591 AltMappings.push_back(&SSMapping);
592
595 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
596 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
597 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
598 4);
599 AltMappings.push_back(&VVMapping);
600
601 return AltMappings;
602 }
603 case TargetOpcode::G_UADDE:
604 case TargetOpcode::G_USUBE:
605 case TargetOpcode::G_SADDE:
606 case TargetOpcode::G_SSUBE: {
610 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
611 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
612 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
615 5);
616 AltMappings.push_back(&SSMapping);
617
620 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
621 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
622 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
623 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
624 5);
625 AltMappings.push_back(&VVMapping);
626 return AltMappings;
627 }
628 case AMDGPU::G_BRCOND: {
629 assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
630
631
634 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1), nullptr}),
635 2);
636 AltMappings.push_back(&SMapping);
637
640 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
641 2);
642 AltMappings.push_back(&VMapping);
643 return AltMappings;
644 }
645 case AMDGPU::G_INTRINSIC:
646 case AMDGPU::G_INTRINSIC_CONVERGENT:
648 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
649 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
651 default:
652 break;
653 }
655}
656
660 LLT HalfTy,
664 Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);
665 Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);
667 MRI->setRegBank(LoLHS, *Bank);
668 MRI->setRegBank(HiLHS, *Bank);
669
672
673 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
674 .addDef(LoLHS)
675 .addDef(HiLHS)
676 .addUse(Reg);
677}
678
679
681 LLT NewTy) {
685 }
686}
687
689 if (Ty.isVector()) {
690 assert(Ty.getElementCount().isKnownMultipleOf(2));
692 Ty.getElementType());
693 }
694
695 assert(Ty.getScalarSizeInBits() % 2 == 0);
696 return LLT::scalar(Ty.getScalarSizeInBits() / 2);
697}
698
699
700
704 LLT Ty = MRI.getType(Src);
706
707 if (Bank == &AMDGPU::SGPRRegBank)
708 return Src;
709
710 unsigned Bits = Ty.getSizeInBits();
711 assert(Bits % 32 == 0);
712
713 if (Bank != &AMDGPU::VGPRRegBank) {
714
715 Src = B.buildCopy(Ty, Src).getReg(0);
716 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
717 }
718
720 unsigned NumParts = Bits / 32;
723
724 if (Bits == 32) {
726 } else {
727 auto Unmerge = B.buildUnmerge(S32, Src);
728 for (unsigned i = 0; i < NumParts; ++i)
729 SrcParts.push_back(Unmerge.getReg(i));
730 }
731
732 for (unsigned i = 0; i < NumParts; ++i) {
733 Register SrcPart = SrcParts[i];
734 Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
735 MRI.setType(DstPart, NumParts == 1 ? Ty : S32);
736
739 (void)Constrained;
740 assert(Constrained && "Failed to constrain readfirstlane src reg");
741
742 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
743
745 }
746
747 if (Bits == 32)
748 return DstParts[0];
749
750 Register Dst = B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
751 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
752 return Dst;
753}
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
776
777
779
782
786
787#ifndef NDEBUG
788 const int OrigRangeSize = std::distance(Range.begin(), Range.end());
789#endif
790
792 Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
793 Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
794
795
796 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
797 .addDef(InitSaveExecReg);
798
799 Register PhiExec = MRI.createVirtualRegister(WaveRC);
800 Register NewExec = MRI.createVirtualRegister(WaveRC);
801
802
803
814
818
819
822
823 MBB.addSuccessor(LoopBB);
825
826 B.setInsertPt(*LoopBB, LoopBB->end());
827
828 B.buildInstr(TargetOpcode::PHI)
829 .addDef(PhiExec)
830 .addReg(InitSaveExecReg)
831 .addMBB(&MBB)
832 .addReg(NewExec)
833 .addMBB(BodyBB);
834
836
838
839
840
842
843
845 auto NewEnd = BodyBB->end();
846
847 B.setMBB(*LoopBB);
848
851
852 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
853
857 if (!SGPROperandRegs.count(OldReg))
858 continue;
859
860
861
862 auto OldVal = WaterfalledRegMap.find(OldReg);
863 if (OldVal != WaterfalledRegMap.end()) {
864 Op.setReg(OldVal->second);
865 continue;
866 }
867
869 LLT OpTy = MRI.getType(OpReg);
870
872 if (OpBank != &AMDGPU::VGPRRegBank) {
873
875 OpReg = B.buildCopy(OpTy, OpReg).getReg(0);
876 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
877 B.setMBB(*LoopBB);
878 }
879
881
882
884 bool Is64 = OpSize % 64 == 0;
885 unsigned PartSize = Is64 ? 64 : 32;
887 unsigned NumParts = OpSize / PartSize;
890
891 if (NumParts == 1) {
893 CurrentLaneParts.push_back(CurrentLaneReg);
894 } else {
895 auto UnmergeOp = B.buildUnmerge(PartTy, OpReg);
896 auto UnmergeCurrentLane = B.buildUnmerge(PartTy, CurrentLaneReg);
897 for (unsigned i = 0; i < NumParts; ++i) {
898 OpParts.push_back(UnmergeOp.getReg(i));
899 CurrentLaneParts.push_back(UnmergeCurrentLane.getReg(i));
900 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
901 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
902 }
903 }
904
905 for (unsigned i = 0; i < NumParts; ++i) {
907 OpParts[i]).getReg(0);
908 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
909
910 if (!CondReg) {
911 CondReg = CmpReg;
912 } else {
913 CondReg = B.buildAnd(S1, CondReg, CmpReg).getReg(0);
914 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
915 }
916 }
917
918 Op.setReg(CurrentLaneReg);
919
920
921 WaterfalledRegMap.insert(std::pair(OldReg, Op.getReg()));
922 }
923 }
924
925
926 CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot,
928 .addReg(CondReg)
929 .getReg(0);
930 MRI.setRegClass(CondReg, WaveRC);
931
932
934 .addDef(NewExec)
936
937 MRI.setSimpleHint(NewExec, CondReg);
938
939 B.setInsertPt(*BodyBB, BodyBB->end());
940
941
945 .addReg(NewExec);
946
947
948
949
950
951 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
952
953
956
957
958 B.setMBB(*RestoreExecBB);
959 B.buildInstr(LMC.MovTermOpc).addDef(LMC.ExecReg).addReg(SaveExecReg);
960
961
962
963 B.setInsertPt(*RemainderBB, RemainderBB->begin());
964
965 return true;
966}
967
968
969
970
971
975 for (unsigned Op : OpIndices) {
979 if (OpBank->getID() != AMDGPU::SGPRRegBankID)
980 SGPROperandRegs.insert(Reg);
981 }
982
983
984 return !SGPROperandRegs.empty();
985}
986
989
990
992
994 return false;
995
998 SGPROperandRegs);
999}
1000
1001
1007 if (Bank == &AMDGPU::SGPRRegBank)
1008 return;
1009
1011 MI.getOperand(OpIdx).setReg(Reg);
1012}
1013
1014
1015
1017 unsigned TotalSize = Ty.getSizeInBits();
1018 if (!Ty.isVector())
1020
1023 assert(FirstSize % EltSize == 0);
1024
1025 unsigned FirstPartNumElts = FirstSize / EltSize;
1026 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1027
1030}
1031
1033 if (!Ty.isVector())
1035
1039}
1040
1046 Register DstReg = MI.getOperand(0).getReg();
1047 const LLT LoadTy = MRI.getType(DstReg);
1050 const unsigned MaxNonSmrdLoadSize = 128;
1051
1054 if (DstBank == &AMDGPU::SGPRRegBank) {
1055
1056
1057 if (LoadSize != 32 && (LoadSize != 96 || Subtarget.hasScalarDwordx3Loads()))
1058 return false;
1059
1061
1062
1063
1064
1065 if (LoadSize == 32 &&
1067 return false;
1068
1069 if (LoadSize == 32 &&
1070 ((MemSize == 8 && MMO->getAlign() >= Align(1)) ||
1071 (MemSize == 16 && MMO->getAlign() >= Align(2))) &&
1074 return false;
1075
1076 Register PtrReg = MI.getOperand(1).getReg();
1077
1078 ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);
1079
1080 if (LoadSize == 32) {
1081
1082
1084 if (MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1085
1086 auto WideLoad = B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);
1087 B.buildSExtInReg(MI.getOperand(0), WideLoad, MemSize);
1088 } else if (MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1089
1090 auto WideLoad = B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);
1091 B.buildZExtInReg(MI.getOperand(0), WideLoad, MemSize);
1092 } else
1093
1094 B.buildLoadFromOffset(MI.getOperand(0), PtrReg, *MMO, 0);
1095 } else {
1096
1097
1100 LLT Part64, Part32;
1104 return false;
1105 return true;
1106 }
1108 auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1110 B.buildTrunc(MI.getOperand(0), WideLoad);
1111 } else {
1112 B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(),
1113 WideLoad);
1114 }
1115 }
1116
1117 MI.eraseFromParent();
1118 return true;
1119 }
1120
1121
1122 if (LoadSize <= MaxNonSmrdLoadSize)
1123 return false;
1124
1126
1127 if (SrcRegs.empty())
1128 SrcRegs.push_back(MI.getOperand(1).getReg());
1129
1130
1131
1132 Register BasePtrReg = SrcRegs[0];
1133 LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
1134 MRI.setType(BasePtrReg, PtrTy);
1135
1136
1137
1140 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1141 unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
1142 const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
1143 ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
1148 return false;
1149 } else {
1151 return false;
1152 }
1153 }
1154
1155 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1156 return true;
1157}
1158
1166 const auto &TFI = *ST.getFrameLowering();
1167
1168
1169
1171 "Stack grows upwards for AMDGPU");
1172
1173 Register Dst = MI.getOperand(0).getReg();
1174 Register AllocSize = MI.getOperand(1).getReg();
1176
1178
1179 if (SizeBank != &AMDGPU::SGPRRegBank) {
1180 auto WaveReduction =
1181 B.buildIntrinsic(Intrinsic::amdgcn_wave_reduce_umax, {LLT::scalar(32)})
1182 .addUse(AllocSize)
1183 .addImm(0);
1184 AllocSize = WaveReduction.getReg(0);
1185 }
1186
1187 LLT PtrTy = MRI.getType(Dst);
1189
1192 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
1193
1194 auto WaveSize = B.buildConstant(LLT::scalar(32), ST.getWavefrontSizeLog2());
1195 auto ScaledSize = B.buildShl(IntPtrTy, AllocSize, WaveSize);
1196
1197 auto OldSP = B.buildCopy(PtrTy, SPReg);
1198 if (Alignment > TFI.getStackAlign()) {
1199 auto StackAlignMask = (Alignment.value() << ST.getWavefrontSizeLog2()) - 1;
1200 auto Tmp1 = B.buildPtrAdd(PtrTy, OldSP,
1201 B.buildConstant(LLT::scalar(32), StackAlignMask));
1202 B.buildMaskLowPtrBits(Dst, Tmp1,
1203 Log2(Alignment) + ST.getWavefrontSizeLog2());
1204 } else {
1205 B.buildCopy(Dst, OldSP);
1206 }
1207 auto PtrAdd = B.buildPtrAdd(PtrTy, Dst, ScaledSize);
1208 B.buildCopy(SPReg, PtrAdd);
1209 MI.eraseFromParent();
1210 return true;
1211}
1212
1216 int RsrcIdx) const {
1217 const int NumDefs = MI.getNumExplicitDefs();
1218
1219
1220
1221 RsrcIdx += NumDefs + 1;
1222
1223
1225
1226
1228 for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
1229 if (.getOperand(I).isReg())
1230 continue;
1231
1232
1233 if (I == RsrcIdx || I == RsrcIdx + 1)
1235 }
1236
1238 return true;
1239}
1240
1241
1242
1245 Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const {
1248
1249 if (std::optional<int64_t> Imm =
1251 uint32_t SOffset, ImmOffset;
1252 if (TII->splitMUBUFOffset(*Imm, SOffset, ImmOffset, Alignment)) {
1253 VOffsetReg = B.buildConstant(S32, 0).getReg(0);
1254 SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
1255 InstOffsetVal = ImmOffset;
1256
1257 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1258 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1259 return SOffset + ImmOffset;
1260 }
1261 }
1262
1265
1268
1269 uint32_t SOffset, ImmOffset;
1270 if ((int)Offset > 0 &&
1271 TII->splitMUBUFOffset(Offset, SOffset, ImmOffset, Alignment)) {
1273 VOffsetReg = Base;
1274 SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
1275 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1276 InstOffsetVal = ImmOffset;
1277 return 0;
1278 }
1279
1280
1281 if (SOffset == 0) {
1282 VOffsetReg = B.buildConstant(S32, 0).getReg(0);
1283 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1284 SOffsetReg = Base;
1285 InstOffsetVal = ImmOffset;
1286 return 0;
1287 }
1288 }
1289
1290
1295
1298
1299 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1300 VOffsetReg = Src0;
1301 SOffsetReg = Src1;
1302 return 0;
1303 }
1304
1305 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1306 VOffsetReg = Src1;
1307 SOffsetReg = Src0;
1308 return 0;
1309 }
1310 }
1311
1312
1313
1314 if (getRegBank(CombinedOffset, *MRI, *TRI) == &AMDGPU::VGPRRegBank) {
1315 VOffsetReg = CombinedOffset;
1316 } else {
1317 VOffsetReg = B.buildCopy(S32, CombinedOffset).getReg(0);
1318 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1319 }
1320
1321 SOffsetReg = B.buildConstant(S32, 0).getReg(0);
1322 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1323 return 0;
1324}
1325
1327 switch (Opc) {
1328 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
1329 return AMDGPU::G_AMDGPU_BUFFER_LOAD;
1330 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
1331 return AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
1332 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
1333 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
1334 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
1335 return AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
1336 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
1337 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
1338 default:
1339 break;
1340 }
1342}
1343
1348
1350 Register Dst = MI.getOperand(0).getReg();
1351 LLT Ty = MRI.getType(Dst);
1352
1357 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1358 OffsetBank == &AMDGPU::SGPRRegBank)
1359 return true;
1360
1361
1362
1363
1364 unsigned LoadSize = Ty.getSizeInBits();
1365 int NumLoads = 1;
1366 if (LoadSize == 256 || LoadSize == 512) {
1367 NumLoads = LoadSize / 128;
1368 Ty = Ty.divide(NumLoads);
1369 }
1370
1371
1372
1373 const Align Alignment = NumLoads > 1 ? Align(16 * NumLoads) : Align(1);
1374
1376
1379 int64_t ImmOffset = 0;
1380
1381 unsigned MMOOffset = setBufferOffsets(B, MI.getOperand(2).getReg(), VOffset,
1382 SOffset, ImmOffset, Alignment);
1383
1384
1385
1386 const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;
1387 const Align MemAlign(4);
1392 MemSize, MemAlign);
1393 if (MMOOffset != 0)
1395
1396
1397
1398
1399 Register RSrc = MI.getOperand(1).getReg();
1400 Register VIndex = B.buildConstant(S32, 0).getReg(0);
1401 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1402
1404
1407
1408 for (int i = 0; i < NumLoads; ++i) {
1409 if (NumLoads == 1) {
1410 LoadParts[i] = Dst;
1411 } else {
1412 LoadParts[i] = MRI.createGenericVirtualRegister(Ty);
1413 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1414 }
1415
1417 if (i != 0)
1419
1421 .addDef(LoadParts[i])
1422 .addUse(RSrc)
1423 .addUse(VIndex)
1424 .addUse(VOffset)
1425 .addUse(SOffset)
1426 .addImm(ImmOffset + 16 * i)
1427 .addImm(0)
1428 .addImm(0)
1429 .addMemOperand(MMO);
1430 }
1431
1432
1433
1434
1435 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1436
1437
1438 B.setInstr(*Span.begin());
1439 MI.eraseFromParent();
1440
1442
1443 OpsToWaterfall.insert(RSrc);
1445 OpsToWaterfall);
1446 }
1447
1448 if (NumLoads != 1) {
1449 if (Ty.isVector())
1450 B.buildConcatVectors(Dst, LoadParts);
1451 else
1452 B.buildMergeLikeInstr(Dst, LoadParts);
1453 }
1454
1455
1456 if (RSrcBank == &AMDGPU::SGPRRegBank)
1457 MI.eraseFromParent();
1458
1459 return true;
1460}
1461
1464 bool Signed) const {
1467
1468
1470
1471 Register DstReg = MI.getOperand(0).getReg();
1472 LLT Ty = MRI.getType(DstReg);
1473
1475
1477 Register SrcReg = MI.getOperand(FirstOpnd).getReg();
1478 Register OffsetReg = MI.getOperand(FirstOpnd + 1).getReg();
1479 Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg();
1480
1483 if (DstBank == &AMDGPU::VGPRRegBank) {
1484 if (Ty == S32)
1485 return true;
1486
1487
1488
1489 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);
1490
1492
1493 auto ShiftOffset = Signed ? B.buildAShr(S64, SrcReg, OffsetReg)
1494 : B.buildLShr(S64, SrcReg, OffsetReg);
1495 auto UnmergeSOffset = B.buildUnmerge({S32, S32}, ShiftOffset);
1496
1497
1498
1500
1501
1502 auto Zero = B.buildConstant(S32, 0);
1503 auto WidthImm = ConstWidth->Value.getZExtValue();
1504 if (WidthImm <= 32) {
1505
1506
1507 auto Extract =
1508 Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1509 : B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1510 auto Extend =
1511 Signed ? B.buildAShr(S32, Extract, B.buildConstant(S32, 31)) : Zero;
1512 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1513 } else {
1514
1515
1516 auto UpperWidth = B.buildConstant(S32, WidthImm - 32);
1517 auto Extract =
1519 ? B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1520 : B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1521 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1522 }
1523 MI.eraseFromParent();
1524 return true;
1525 }
1526
1527
1528
1529 auto ExtShift = B.buildSub(S32, B.buildConstant(S32, 64), WidthReg);
1530 auto SignBit = B.buildShl(S64, ShiftOffset, ExtShift);
1532 B.buildAShr(S64, SignBit, ExtShift);
1533 else
1534 B.buildLShr(S64, SignBit, ExtShift);
1535 MI.eraseFromParent();
1536 return true;
1537 }
1538
1539
1540
1541 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
1542
1543
1545 auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
1546
1547
1548 auto ShiftWidth = B.buildShl(S32, WidthReg, B.buildConstant(S32, 16));
1549
1550
1551
1552
1553 auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);
1554
1555
1556
1557 unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1558 (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1559
1560 auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1563
1564 MI.eraseFromParent();
1565 return true;
1566}
1567
1572
1573
1575
1576 Register Dst0 = MI.getOperand(0).getReg();
1577 Register Dst1 = MI.getOperand(1).getReg();
1578 Register Src0 = MI.getOperand(2).getReg();
1579 Register Src1 = MI.getOperand(3).getReg();
1580 Register Src2 = MI.getOperand(4).getReg();
1581
1582 if (MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1583 return true;
1584
1585 bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1588
1589 bool DstOnValu = MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1590 bool Accumulate = true;
1591
1592 if (!DstOnValu) {
1594 Accumulate = false;
1595 }
1596
1597
1599 Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0);
1600 bool MulHiInVgpr = false;
1601
1602 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1603
1605 DstHi = IsUnsigned ? B.buildUMulH(S32, Src0, Src1).getReg(0)
1606 : B.buildSMulH(S32, Src0, Src1).getReg(0);
1607 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1608 } else {
1609 Register VSrc0 = B.buildCopy(S32, Src0).getReg(0);
1610 Register VSrc1 = B.buildCopy(S32, Src1).getReg(0);
1611
1612 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1613 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1614
1615 DstHi = IsUnsigned ? B.buildUMulH(S32, VSrc0, VSrc1).getReg(0)
1616 : B.buildSMulH(S32, VSrc0, VSrc1).getReg(0);
1617 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1618
1619 if (!DstOnValu) {
1621 } else {
1622 MulHiInVgpr = true;
1623 }
1624 }
1625
1626
1627
1628
1629
1630
1631
1632
1633 LLT CarryType = DstOnValu ? S1 : S32;
1635 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1637 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1640
1641 if (!IsUnsigned) {
1642 Zero = B.buildConstant(S32, 0).getReg(0);
1643 MRI.setRegBank(Zero,
1644 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1645
1647 .getReg(0);
1648 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1649 : AMDGPU::SGPRRegBank);
1650
1651 if (DstOnValu && !MulHiInVgpr) {
1652 Carry = B.buildTrunc(S1, Carry).getReg(0);
1653 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1654 }
1655 }
1656
1657 if (Accumulate) {
1658 if (DstOnValu) {
1659 DstLo = B.buildCopy(S32, DstLo).getReg(0);
1660 DstHi = B.buildCopy(S32, DstHi).getReg(0);
1661 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1662 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1663 }
1664
1665 auto Unmerge = B.buildUnmerge(S32, Src2);
1666 Register Src2Lo = Unmerge.getReg(0);
1667 Register Src2Hi = Unmerge.getReg(1);
1668 MRI.setRegBank(Src2Lo, DstBank);
1669 MRI.setRegBank(Src2Hi, DstBank);
1670
1671 if (!IsUnsigned) {
1672 auto Src2Sign = B.buildICmp(CmpInst::ICMP_SLT, CarryType, Src2Hi, Zero);
1673 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1674
1675 Carry = B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1676 MRI.setRegBank(Carry, CarryBank);
1677 }
1678
1679 auto AddLo = B.buildUAddo(S32, CarryType, DstLo, Src2Lo);
1680 DstLo = AddLo.getReg(0);
1681 Register CarryLo = AddLo.getReg(1);
1682 MRI.setRegBank(DstLo, DstBank);
1683 MRI.setRegBank(CarryLo, CarryBank);
1684
1685 auto AddHi = B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo);
1686 DstHi = AddHi.getReg(0);
1687 MRI.setRegBank(DstHi, DstBank);
1688
1689 Register CarryHi = AddHi.getReg(1);
1690 MRI.setRegBank(CarryHi, CarryBank);
1691
1692 if (IsUnsigned) {
1693 Carry = CarryHi;
1694 } else {
1695 Carry = B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1696 MRI.setRegBank(Carry, CarryBank);
1697 }
1698 } else {
1699 if (IsUnsigned) {
1700 Carry = B.buildConstant(CarryType, 0).getReg(0);
1701 MRI.setRegBank(Carry, CarryBank);
1702 }
1703 }
1704
1705 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1706
1707 if (DstOnValu) {
1708 B.buildCopy(Dst1, Carry);
1709 } else {
1710 B.buildTrunc(Dst1, Carry);
1711 }
1712
1713 MI.eraseFromParent();
1714 return true;
1715}
1716
1717
1719 switch (Opc) {
1720 case TargetOpcode::G_ASHR:
1721 case TargetOpcode::G_SMIN:
1722 case TargetOpcode::G_SMAX:
1723 return TargetOpcode::G_SEXT;
1724 case TargetOpcode::G_LSHR:
1725 case TargetOpcode::G_UMIN:
1726 case TargetOpcode::G_UMAX:
1727 return TargetOpcode::G_ZEXT;
1728 default:
1729 return TargetOpcode::G_ANYEXT;
1730 }
1731}
1732
1733
1734
1735static std::pair<Register, Register>
1738 auto Bitcast = B.buildBitcast(S32, Src);
1739
1740 if (ExtOpcode == TargetOpcode::G_SEXT) {
1741 auto ExtLo = B.buildSExtInReg(S32, Bitcast, 16);
1742 auto ShiftHi = B.buildAShr(S32, Bitcast, B.buildConstant(S32, 16));
1743 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1744 }
1745
1746 auto ShiftHi = B.buildLShr(S32, Bitcast, B.buildConstant(S32, 16));
1747 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1748 auto ExtLo = B.buildAnd(S32, Bitcast, B.buildConstant(S32, 0xffff));
1749 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1750 }
1751
1752 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1753 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1754}
1755
1756
1757
1761 if (!SrcReg.empty()) {
1764 return true;
1765 }
1766
1767 return false;
1768}
1769
1770
1774 if (.hasUnpackedD16VMem())
1775 return Reg;
1776
1778 LLT StoreVT = MRI.getType(Reg);
1780 return Reg;
1781
1782 auto Unmerge = B.buildUnmerge(S16, Reg);
1783
1784
1786 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
1787 WideRegs.push_back(Unmerge.getReg(I));
1788
1791
1793 .getReg(0);
1794}
1795
1796static std::pair<Register, unsigned>
1798 int64_t Const;
1800 return std::pair(Register(), Const);
1801
1804 return std::pair(Base, Const);
1805
1806
1807 return std::pair(Reg, 0);
1808}
1809
1810std::pair<Register, unsigned>
1812 Register OrigOffset) const {
1815 unsigned ImmOffset;
1817
1818
1820 OrigOffset);
1821
1822 unsigned C1 = 0;
1823 if (ImmOffset != 0) {
1824
1825
1826
1827
1828
1829
1830
1831
1832 unsigned Overflow = ImmOffset & ~MaxImm;
1833 ImmOffset -= Overflow;
1834 if ((int32_t)Overflow < 0) {
1835 Overflow += ImmOffset;
1836 ImmOffset = 0;
1837 }
1838
1839 C1 = ImmOffset;
1840 if (Overflow != 0) {
1841 if (!BaseReg)
1842 BaseReg = B.buildConstant(S32, Overflow).getReg(0);
1843 else {
1844 auto OverflowVal = B.buildConstant(S32, Overflow);
1845 BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
1846 }
1847 }
1848 }
1849
1850 if (!BaseReg)
1851 BaseReg = B.buildConstant(S32, 0).getReg(0);
1852
1853 return {BaseReg, C1};
1854}
1855
1859 LLT SrcTy = MRI.getType(SrcReg);
1860 if (SrcTy.getSizeInBits() == 32) {
1861
1862 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1863 .addDef(DstReg)
1864 .addUse(SrcReg);
1867 }
1868
1869 Register TmpReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1870 Register TmpReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1871
1872 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1873 .addDef(TmpReg0)
1874 .addUse(SrcReg, 0, AMDGPU::sub0);
1875 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1876 .addDef(TmpReg1)
1877 .addUse(SrcReg, 0, AMDGPU::sub1);
1878 B.buildInstr(AMDGPU::REG_SEQUENCE)
1879 .addDef(DstReg)
1880 .addUse(TmpReg0)
1881 .addImm(AMDGPU::sub0)
1882 .addUse(TmpReg1)
1883 .addImm(AMDGPU::sub1);
1884
1887}
1888
1889
1890
1894 unsigned ConstOffset) {
1899
1900 auto MaterializedOffset = B.buildConstant(S32, ConstOffset);
1901
1902 auto Add = B.buildAdd(S32, WaterfallIdx, MaterializedOffset);
1903 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1904 MRI.setRegBank(Add.getReg(0), AMDGPU::SGPRRegBank);
1906}
1907
1908
1909
1910
1911
1914 unsigned ExtOpc,
1916 bool IsBooleanSrc = false) {
1917 if (ExtOpc == AMDGPU::G_ZEXT) {
1918 B.buildConstant(Hi32Reg, 0);
1919 } else if (ExtOpc == AMDGPU::G_SEXT) {
1920 if (IsBooleanSrc) {
1921
1922
1923 B.buildCopy(Hi32Reg, Lo32Reg);
1924 } else {
1925
1926 auto ShiftAmt = B.buildConstant(LLT::scalar(32), 31);
1927 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1928 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1929 }
1930 } else {
1931 assert(ExtOpc == AMDGPU::G_ANYEXT && "not an integer extension");
1932 B.buildUndef(Hi32Reg);
1933 }
1934}
1935
1936bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1938 const OperandsMapper &OpdMapper) const {
1939 MachineRegisterInfo &MRI = *B.getMRI();
1940
1941 Register VecReg = MI.getOperand(1).getReg();
1942 Register Idx = MI.getOperand(2).getReg();
1943
1944 const RegisterBank &IdxBank =
1945 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1946
1947 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1948
1949 LLT VecTy = MRI.getType(VecReg);
1952
1955 return false;
1956
1958
1959 const RegisterBank &DstBank =
1960 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1961 const RegisterBank &SrcBank =
1962 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1963
1964 const RegisterBank &CCBank =
1965 (DstBank == AMDGPU::SGPRRegBank &&
1966 SrcBank == AMDGPU::SGPRRegBank &&
1967 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1968 : AMDGPU::VCCRegBank;
1969 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1);
1970
1971 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1972 Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg();
1973 MRI.setRegBank(Idx, AMDGPU::VGPRRegBank);
1974 }
1975
1978 unsigned NumLanes = DstRegs.size();
1979 if (!NumLanes)
1980 NumLanes = 1;
1981 else
1982 EltTy = MRI.getType(DstRegs[0]);
1983
1984 auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg);
1986 for (unsigned L = 0; L < NumLanes; ++L)
1987 Res[L] = UnmergeToEltTy.getReg(L);
1988
1989 for (unsigned I = 1; I < NumElem; ++I) {
1990 auto IC = B.buildConstant(S32, I);
1991 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
1993 MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank);
1994
1995 for (unsigned L = 0; L < NumLanes; ++L) {
1996 auto S = B.buildSelect(EltTy, Cmp,
1997 UnmergeToEltTy.getReg(I * NumLanes + L), Res[L]);
1998
1999 for (unsigned N : { 0, 2, 3 })
2000 MRI.setRegBank(S->getOperand(N).getReg(), DstBank);
2001
2002 Res[L] = S->getOperand(0).getReg();
2003 }
2004 }
2005
2006 for (unsigned L = 0; L < NumLanes; ++L) {
2007 Register DstReg = (NumLanes == 1) ? MI.getOperand(0).getReg() : DstRegs[L];
2008 B.buildCopy(DstReg, Res[L]);
2009 MRI.setRegBank(DstReg, DstBank);
2010 }
2011
2012 MRI.setRegBank(MI.getOperand(0).getReg(), DstBank);
2013 MI.eraseFromParent();
2014
2015 return true;
2016}
2017
2018
2019
2024 if (CurrBank && *CurrBank != Bank) {
2026 MRI.setRegBank(Copy, Bank);
2027 return Copy;
2028 }
2029
2030 MRI.setRegBank(Reg, Bank);
2031 return Reg;
2032}
2033
2034bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2036 const OperandsMapper &OpdMapper) const {
2037
2038 MachineRegisterInfo &MRI = *B.getMRI();
2039 Register VecReg = MI.getOperand(1).getReg();
2040 Register Idx = MI.getOperand(3).getReg();
2041
2042 const RegisterBank &IdxBank =
2043 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2044
2045 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2046
2047 LLT VecTy = MRI.getType(VecReg);
2050
2053 return false;
2054
2056
2057 const RegisterBank &DstBank =
2058 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2059 const RegisterBank &SrcBank =
2060 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2061 const RegisterBank &InsBank =
2062 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2063
2064 const RegisterBank &CCBank =
2065 (DstBank == AMDGPU::SGPRRegBank &&
2066 SrcBank == AMDGPU::SGPRRegBank &&
2067 InsBank == AMDGPU::SGPRRegBank &&
2068 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2069 : AMDGPU::VCCRegBank;
2070 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1);
2071
2072 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2073 Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg();
2074 MRI.setRegBank(Idx, AMDGPU::VGPRRegBank);
2075 }
2076
2079 unsigned NumLanes = InsRegs.size();
2080 if (!NumLanes) {
2081 NumLanes = 1;
2082 InsRegs.push_back(MI.getOperand(2).getReg());
2083 } else {
2084 EltTy = MRI.getType(InsRegs[0]);
2085 }
2086
2087 auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg);
2088 SmallVector<Register, 16> Ops(NumElem * NumLanes);
2089
2090 for (unsigned I = 0; I < NumElem; ++I) {
2091 auto IC = B.buildConstant(S32, I);
2092 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2094 MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank);
2095
2096 for (unsigned L = 0; L < NumLanes; ++L) {
2098 Register Op1 = UnmergeToEltTy.getReg(I * NumLanes + L);
2100
2101 Register Select = B.buildSelect(EltTy, Cmp, Op0, Op1).getReg(0);
2102 MRI.setRegBank(Select, DstBank);
2103
2105 }
2106 }
2107
2109 if (MergeTy == MRI.getType(MI.getOperand(0).getReg())) {
2110 B.buildBuildVector(MI.getOperand(0), Ops);
2111 } else {
2112 auto Vec = B.buildBuildVector(MergeTy, Ops);
2113 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2114 B.buildBitcast(MI.getOperand(0).getReg(), Vec);
2115 }
2116
2117 MRI.setRegBank(MI.getOperand(0).getReg(), DstBank);
2118 MI.eraseFromParent();
2119
2120 return true;
2121}
2122
2123
2129
2130
2131 if (DefRegs.empty()) {
2134 return;
2135 }
2136
2139 (Src0Regs.empty() || Src0Regs.size() == 2));
2140
2143 Register DstReg = MI.getOperand(0).getReg();
2145
2146
2147
2148
2149
2150 if (Src0Regs.empty())
2152 else
2154
2155 if (Src1Regs.empty())
2157 else
2159
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178 ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);
2179
2180 Register Hi = B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2181 Register MulLoHi = B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2182 Register Add = B.buildAdd(HalfTy, Hi, MulLoHi).getReg(0);
2183 Register MulHiLo = B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2184 B.buildAdd(DefRegs[1], Add, MulHiLo);
2185 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2186
2187 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2188 MI.eraseFromParent();
2189}
2190
2194 B.setInstrAndDebugLoc(MI);
2195 unsigned Opc = MI.getOpcode();
2197 switch (Opc) {
2198 case AMDGPU::G_CONSTANT:
2199 case AMDGPU::G_IMPLICIT_DEF: {
2200 Register DstReg = MI.getOperand(0).getReg();
2201 LLT DstTy = MRI.getType(DstReg);
2203 break;
2204
2207 if (DstBank == &AMDGPU::VCCRegBank)
2208 break;
2210 if (DefRegs.empty())
2212
2213 B.setInsertPt(*MI.getParent(), ++MI.getIterator());
2214
2216 LLVMContext &Ctx = B.getMF().getFunction().getContext();
2217
2218 MI.getOperand(0).setReg(NewDstReg);
2219 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2220 uint64_t ConstVal = MI.getOperand(1).getCImm()->getZExtValue();
2221 MI.getOperand(1).setCImm(
2223 }
2224
2225 MRI.setRegBank(NewDstReg, *DstBank);
2226 B.buildTrunc(DefRegs[0], NewDstReg);
2227 return;
2228 }
2229 case AMDGPU::G_PHI: {
2230 Register DstReg = MI.getOperand(0).getReg();
2231 LLT DstTy = MRI.getType(DstReg);
2233 break;
2234
2238 if (DstBank == &AMDGPU::VCCRegBank) {
2240
2241
2242
2243
2244
2245 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2246 Register SrcReg = MI.getOperand(I).getReg();
2248
2249 if (SrcBank != &AMDGPU::VCCRegBank) {
2252
2253 auto Copy = B.buildCopy(LLT::scalar(1), SrcReg);
2254 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2255 MI.getOperand(I).setReg(Copy.getReg(0));
2256 }
2257 }
2258
2259 return;
2260 }
2261
2262
2264
2265
2266 ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);
2267 B.setInsertPt(B.getMBB(), MI);
2269
2272
2273 return;
2274 }
2275 case AMDGPU::G_FCMP:
2276 if (.hasSALUFloatInsts())
2277 break;
2278 [[fallthrough]];
2279 case AMDGPU::G_ICMP:
2280 case AMDGPU::G_UADDO:
2281 case AMDGPU::G_USUBO:
2282 case AMDGPU::G_UADDE:
2283 case AMDGPU::G_SADDE:
2284 case AMDGPU::G_USUBE:
2285 case AMDGPU::G_SSUBE: {
2286 unsigned BoolDstOp =
2287 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2288 Register DstReg = MI.getOperand(BoolDstOp).getReg();
2289
2292 if (DstBank != &AMDGPU::SGPRRegBank)
2293 break;
2294
2295 const bool HasCarryIn = MI.getNumOperands() == 5;
2296
2297
2298
2300 Register NewDstReg = MRI.createGenericVirtualRegister(S32);
2301 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2302 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2303
2304 if (HasCarryIn) {
2305 Register NewSrcReg = MRI.createGenericVirtualRegister(S32);
2306 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2307 B.buildZExt(NewSrcReg, MI.getOperand(4).getReg());
2308 MI.getOperand(4).setReg(NewSrcReg);
2309 }
2310
2312 B.setInsertPt(*MBB, std::next(MI.getIterator()));
2313
2314
2315
2317 if (DefRegs.empty())
2319 B.buildTrunc(DefRegs[0], NewDstReg);
2320 return;
2321 }
2322 case AMDGPU::G_SELECT: {
2323 Register DstReg = MI.getOperand(0).getReg();
2324 LLT DstTy = MRI.getType(DstReg);
2325
2327 if (CondRegs.empty())
2328 CondRegs.push_back(MI.getOperand(1).getReg());
2329 else {
2331 }
2332
2334 if (CondBank == &AMDGPU::SGPRRegBank) {
2336 Register NewCondReg = MRI.createGenericVirtualRegister(S32);
2337 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2338
2339 MI.getOperand(1).setReg(NewCondReg);
2340 B.buildZExt(NewCondReg, CondRegs[0]);
2341 }
2342
2344 break;
2345
2347
2351
2352
2353 if (DefRegs.empty()) {
2355 break;
2356 }
2357
2358 if (Src1Regs.empty())
2360 else {
2362 }
2363
2364 if (Src2Regs.empty())
2366 else
2368
2370
2371 auto Flags = MI.getFlags();
2372 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0], Flags);
2373 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1], Flags);
2374
2375 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2376 MI.eraseFromParent();
2377 return;
2378 }
2379 case AMDGPU::G_BRCOND: {
2380 Register CondReg = MI.getOperand(0).getReg();
2381
2384
2385 if (CondBank == &AMDGPU::SGPRRegBank) {
2387 Register NewCondReg = MRI.createGenericVirtualRegister(S32);
2388 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2389
2390 MI.getOperand(0).setReg(NewCondReg);
2391 B.buildZExt(NewCondReg, CondReg);
2392 return;
2393 }
2394
2395 break;
2396 }
2397 case AMDGPU::G_AND:
2398 case AMDGPU::G_OR:
2399 case AMDGPU::G_XOR: {
2400
2401
2402 Register DstReg = MI.getOperand(0).getReg();
2403 LLT DstTy = MRI.getType(DstReg);
2404
2407
2409 if (DstBank == &AMDGPU::VCCRegBank)
2410 break;
2411
2413 ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank);
2415
2419 return;
2420 }
2421
2422 if (DstTy.getSizeInBits() == 16 && DstBank == &AMDGPU::SGPRRegBank) {
2426 ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank);
2428
2429
2430
2431 if (MI.getOpcode() == AMDGPU::G_XOR &&
2436 } else {
2439 }
2440 return;
2441 }
2442
2444 break;
2445
2450
2451
2452 if (DefRegs.empty()) {
2454 break;
2455 }
2456
2459 (Src0Regs.empty() || Src0Regs.size() == 2));
2460
2461
2462
2463
2464
2465 if (Src0Regs.empty())
2467 else
2469
2470 if (Src1Regs.empty())
2472 else
2474
2476
2477 auto Flags = MI.getFlags();
2478 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]}, Flags);
2479 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]}, Flags);
2480
2481 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2482 MI.eraseFromParent();
2483 return;
2484 }
2485 case AMDGPU::G_ABS: {
2486 Register SrcReg = MI.getOperand(1).getReg();
2487 const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg);
2488
2489
2490
2491 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2493 ApplyRegBankMapping Apply(B, *this, MRI, &AMDGPU::VGPRRegBank);
2495
2498 return;
2499 }
2500 [[fallthrough]];
2501 }
2502 case AMDGPU::G_ADD:
2503 case AMDGPU::G_SUB:
2504 case AMDGPU::G_MUL:
2505 case AMDGPU::G_SHL:
2506 case AMDGPU::G_LSHR:
2507 case AMDGPU::G_ASHR:
2508 case AMDGPU::G_SMIN:
2509 case AMDGPU::G_SMAX:
2510 case AMDGPU::G_UMIN:
2511 case AMDGPU::G_UMAX: {
2512 Register DstReg = MI.getOperand(0).getReg();
2513 LLT DstTy = MRI.getType(DstReg);
2514
2515
2516
2517
2518 if (.hasVectorMulU64() && Opc == AMDGPU::G_MUL &&
2521 return;
2522 }
2523
2524
2525
2527 break;
2528
2531 if (DstBank == &AMDGPU::VGPRRegBank)
2532 break;
2533
2537 ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank);
2538
2539 if (DstTy.isVector() && Opc == AMDGPU::G_ABS) {
2540 Register WideSrcLo, WideSrcHi;
2541
2542 std::tie(WideSrcLo, WideSrcHi) =
2544 auto Lo = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcLo});
2545 auto Hi = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcHi});
2546 B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
2547 MI.eraseFromParent();
2548 return;
2549 }
2550
2552 Register WideSrc0Lo, WideSrc0Hi;
2553 Register WideSrc1Lo, WideSrc1Hi;
2554
2555 unsigned ExtendOp = getExtendOp(MI.getOpcode());
2556 std::tie(WideSrc0Lo, WideSrc0Hi)
2558 std::tie(WideSrc1Lo, WideSrc1Hi)
2560 auto Lo = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2561 auto Hi = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2562 B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
2563 MI.eraseFromParent();
2564 } else {
2566
2569
2570
2571 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2572 Opc == AMDGPU::G_ASHR) {
2573 B.setInsertPt(*MBB, MI.getIterator());
2576 }
2577 }
2578
2579 return;
2580 }
2581 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2582 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2594
2595 Register DstReg = MI.getOperand(0).getReg();
2596 Register SrcReg0 = MI.getOperand(1).getReg();
2597 Register SrcReg1 = MI.getOperand(2).getReg();
2600 assert(MRI.getType(DstReg) == S64 && "This is a special case for s_mul_u64 "
2601 "that handles only 64-bit operands.");
2604
2605
2606
2607 if (DstBank == &AMDGPU::SGPRRegBank) {
2608 MI.setDesc(TII->get(AMDGPU::S_MUL_U64));
2609 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2610 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2611 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2612 return;
2613 }
2614
2615
2616
2617 assert(MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2618 "The destination operand should be in vector registers.");
2619
2620
2621 Register Op0L = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2622 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2624 B.buildTrunc(Op0L, SrcReg0);
2625
2626
2627 Register Op1L = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2628 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2630 B.buildTrunc(Op1L, SrcReg1);
2631
2632 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2633 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2634 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2635
2637 Register Zero64 = B.buildConstant(S64, 0).getReg(0);
2638 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2639 Register CarryOut = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2640 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2641 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2642 MI.eraseFromParent();
2643 return;
2644 }
2645 case AMDGPU::G_SEXT_INREG: {
2647 if (SrcRegs.empty())
2648 break;
2649
2651 ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
2652
2653
2654
2655
2657
2658 int Amt = MI.getOperand(2).getImm();
2659 if (Amt <= 32) {
2660
2661
2662 if (Amt == 32) {
2663
2664 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2665 } else {
2666 auto Freeze = B.buildFreeze(S32, SrcRegs[0]);
2667
2668 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2669 }
2670
2671 B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
2672 } else {
2673
2674
2675 B.buildCopy(DstRegs[0], SrcRegs[0]);
2676 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2677 }
2678
2679 Register DstReg = MI.getOperand(0).getReg();
2680 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2681 MI.eraseFromParent();
2682 return;
2683 }
2684 case AMDGPU::G_CTPOP:
2685 case AMDGPU::G_BITREVERSE: {
2688 if (DstBank == &AMDGPU::SGPRRegBank)
2689 break;
2690
2691 Register SrcReg = MI.getOperand(1).getReg();
2693 LLT Ty = MRI.getType(SrcReg);
2694 if (Ty == S32)
2695 break;
2696
2697 ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank);
2698
2701
2704 return;
2705 }
2706 case AMDGPU::G_AMDGPU_FFBH_U32:
2707 case AMDGPU::G_AMDGPU_FFBL_B32:
2708 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2709 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2712 if (DstBank == &AMDGPU::SGPRRegBank)
2713 break;
2714
2715 Register SrcReg = MI.getOperand(1).getReg();
2717 LLT Ty = MRI.getType(SrcReg);
2718 if (Ty == S32)
2719 break;
2720
2721
2722
2723
2724
2725
2726
2727 ApplyRegBankMapping ApplyVALU(B, *this, MRI, &AMDGPU::VGPRRegBank);
2729 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2730 ? (unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2731 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2732 ? (unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2734 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2735 auto X = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx]});
2736 auto Y = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx ^ 1]});
2737 unsigned AddOpc =
2738 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2739 ? AMDGPU::G_ADD
2740 : AMDGPU::G_UADDSAT;
2741 Y = B.buildInstr(AddOpc, {S32}, {Y, B.buildConstant(S32, 32)});
2742 Register DstReg = MI.getOperand(0).getReg();
2743 B.buildUMin(DstReg, X, Y);
2744 MI.eraseFromParent();
2745 return;
2746 }
2747 case AMDGPU::G_SEXT:
2748 case AMDGPU::G_ZEXT:
2749 case AMDGPU::G_ANYEXT: {
2750 Register SrcReg = MI.getOperand(1).getReg();
2751 LLT SrcTy = MRI.getType(SrcReg);
2752 const bool Signed = Opc == AMDGPU::G_SEXT;
2753
2755
2758
2759 Register DstReg = MI.getOperand(0).getReg();
2760 LLT DstTy = MRI.getType(DstReg);
2762 SrcBank != &AMDGPU::SGPRRegBank &&
2763 SrcBank != &AMDGPU::VCCRegBank &&
2764
2765
2767 SrcTy.getSizeInBits() <= 32) {
2769
2770
2772
2773 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2774 } else if (Opc == AMDGPU::G_ZEXT) {
2775 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2776 } else {
2777 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2778 }
2779
2781 MRI.setRegBank(DstReg, *SrcBank);
2782 MI.eraseFromParent();
2783 return;
2784 }
2785
2787 return;
2788
2789
2790
2791
2792 if (SrcBank == &AMDGPU::VCCRegBank) {
2794
2795 const RegisterBank *DstBank = &AMDGPU::VGPRRegBank;
2796
2798
2799 const bool UseSel64 = DstSize > 32 &&
2800 SrcBank->getID() == AMDGPU::SGPRRegBankID;
2801
2802
2804 auto True = B.buildConstant(SelType, Signed ? -1 : 1);
2805 auto False = B.buildConstant(SelType, 0);
2806
2807 MRI.setRegBank(True.getReg(0), *DstBank);
2808 MRI.setRegBank(False.getReg(0), *DstBank);
2809 MRI.setRegBank(DstReg, *DstBank);
2810
2811 if (DstSize > 32) {
2812 B.buildSelect(DefRegs[0], SrcReg, True, False);
2814 } else if (DstSize < 32) {
2815 auto Sel = B.buildSelect(SelType, SrcReg, True, False);
2816 MRI.setRegBank(Sel.getReg(0), *DstBank);
2817 B.buildTrunc(DstReg, Sel);
2818 } else {
2819 B.buildSelect(DstReg, SrcReg, True, False);
2820 }
2821
2822 MI.eraseFromParent();
2823 return;
2824 }
2825
2826 break;
2827 }
2828 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2830
2832
2833 Register DstReg = MI.getOperand(0).getReg();
2834 Register SrcReg = MI.getOperand(1).getReg();
2835
2837 LLT DstTy = MRI.getType(DstReg);
2838 LLT SrcTy = MRI.getType(SrcReg);
2839
2840 if (foldExtractEltToCmpSelect(B, MI, OpdMapper))
2841 return;
2842
2850
2852 unsigned ConstOffset;
2853 std::tie(BaseIdxReg, ConstOffset) =
2855
2856
2857
2858
2859
2860 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2861 ConstOffset > 0 &&
2862 ConstOffset < SrcTy.getNumElements();
2863
2864
2865 if (ShouldMoveIndexIntoLoop)
2866 MI.getOperand(2).setReg(BaseIdxReg);
2867
2868
2869
2870
2871
2872 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2873 SrcBank == &AMDGPU::SGPRRegBank;
2874 if (DstRegs.empty()) {
2876
2878
2879 if (NeedCopyToVGPR) {
2880
2881 Register TmpReg = MRI.createGenericVirtualRegister(DstTy);
2882 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2883 MI.getOperand(0).setReg(TmpReg);
2884 B.setInsertPt(*MI.getParent(), ++MI.getIterator());
2885
2886
2888 }
2889
2890
2891 if (ShouldMoveIndexIntoLoop)
2893
2894 return;
2895 }
2896
2898
2900
2901 auto CastSrc = B.buildBitcast(Vec32, SrcReg);
2902 auto One = B.buildConstant(S32, 1);
2903
2905
2906
2907
2908
2909
2911
2912
2913 auto IdxLo = B.buildShl(S32, BaseIdxReg, One);
2914 auto IdxHi = B.buildAdd(S32, IdxLo, One);
2915
2916 auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2917 auto Extract1 = B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2918
2919 MRI.setRegBank(DstReg, *DstBank);
2920 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2921 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2922 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2923 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2924
2927 MI.eraseFromParent();
2928 return;
2929 }
2930
2931
2932
2933 B.setInstr(*Span.begin());
2934 MI.eraseFromParent();
2936 OpsToWaterfall);
2937
2938 if (NeedCopyToVGPR) {
2940 Register TmpReg0 = MRI.createGenericVirtualRegister(S32);
2941 Register TmpReg1 = MRI.createGenericVirtualRegister(S32);
2942 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2943 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2944
2945 Extract0->getOperand(0).setReg(TmpReg0);
2946 Extract1->getOperand(0).setReg(TmpReg1);
2947
2948 B.setInsertPt(*LoopBB, ++Extract1->getIterator());
2949
2952 }
2953
2954 if (ShouldMoveIndexIntoLoop)
2956
2957 return;
2958 }
2959 case AMDGPU::G_INSERT_VECTOR_ELT: {
2961
2962 Register DstReg = MI.getOperand(0).getReg();
2963 LLT VecTy = MRI.getType(DstReg);
2964
2967
2969 MRI.setType(MI.getOperand(1).getReg(), VecTy);
2970
2971 if (foldInsertEltToCmpSelect(B, MI, OpdMapper))
2972 return;
2973
2976
2977 Register SrcReg = MI.getOperand(1).getReg();
2978 Register InsReg = MI.getOperand(2).getReg();
2979 LLT InsTy = MRI.getType(InsReg);
2980 (void)InsTy;
2981
2983 unsigned ConstOffset;
2984 std::tie(BaseIdxReg, ConstOffset) =
2986
2987
2988
2989
2990
2991 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2992 ConstOffset > 0 &&
2994
2995
2996 if (ShouldMoveIndexIntoLoop)
2997 MI.getOperand(3).setReg(BaseIdxReg);
2998
2999
3000 if (InsRegs.empty()) {
3002
3003
3004 if (ShouldMoveIndexIntoLoop) {
3006 }
3007
3008 return;
3009 }
3010
3012
3015
3016 auto CastSrc = B.buildBitcast(Vec32, SrcReg);
3017 auto One = B.buildConstant(S32, 1);
3018
3019
3020
3021
3022
3024
3025
3026 auto IdxLo = B.buildShl(S32, BaseIdxReg, One);
3027 auto IdxHi = B.buildAdd(S32, IdxLo, One);
3028
3029 auto InsLo = B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
3030 auto InsHi = B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3031
3038
3039 MRI.setRegBank(InsReg, *InsSrcBank);
3040 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3041 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3042 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3043 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3044 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3045 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3046
3047
3050 B.setInsertPt(B.getMBB(), MI);
3051 B.buildBitcast(DstReg, InsHi);
3052 MI.eraseFromParent();
3053 return;
3054 }
3055
3056 B.setInstr(*Span.begin());
3057 MI.eraseFromParent();
3058
3059
3060
3062 OpsToWaterfall);
3063
3064
3065
3066
3067
3068 B.buildBitcast(DstReg, InsHi);
3069
3070
3071 if (ShouldMoveIndexIntoLoop)
3073
3074 return;
3075 }
3076 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3077 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3078 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3079 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3080 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3081 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3082 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3083 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3084 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3085 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3086 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3087 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3088 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3089 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3090 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3091 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3092 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3093 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3094 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3095 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3096 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3097 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3100 return;
3101 }
3102 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3103 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3104 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3105 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3106 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3107 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3108 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3109 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3110 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3111 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3112 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3113 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
3114 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32:
3115 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32:
3116 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3117 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3118 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3121 return;
3122 }
3123 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3126 return;
3127 }
3128 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3129 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3130 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3131 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3132 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3134 return;
3135 }
3136 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
3139 return;
3140 case AMDGPU::G_INTRINSIC:
3141 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3143 case Intrinsic::amdgcn_readlane: {
3145
3148
3149
3150
3152 return;
3153 }
3154 case Intrinsic::amdgcn_writelane: {
3158
3162 return;
3163 }
3164 case Intrinsic::amdgcn_interp_p1:
3165 case Intrinsic::amdgcn_interp_p2:
3166 case Intrinsic::amdgcn_interp_mov:
3167 case Intrinsic::amdgcn_interp_p1_f16:
3168 case Intrinsic::amdgcn_interp_p2_f16:
3169 case Intrinsic::amdgcn_lds_param_load: {
3171
3172
3173
3175 return;
3176 }
3177 case Intrinsic::amdgcn_interp_inreg_p10:
3178 case Intrinsic::amdgcn_interp_inreg_p2:
3179 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3180 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3181 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3182 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3183 case Intrinsic::amdgcn_permlane16_swap:
3184 case Intrinsic::amdgcn_permlane32_swap:
3186 return;
3187 case Intrinsic::amdgcn_permlane16:
3188 case Intrinsic::amdgcn_permlanex16: {
3189
3194 return;
3195 }
3196 case Intrinsic::amdgcn_permlane_bcast:
3197 case Intrinsic::amdgcn_permlane_up:
3198 case Intrinsic::amdgcn_permlane_down:
3199 case Intrinsic::amdgcn_permlane_xor:
3200
3203 return;
3204 case Intrinsic::amdgcn_permlane_idx_gen: {
3206 return;
3207 }
3208 case Intrinsic::amdgcn_sbfe:
3210 return;
3211 case Intrinsic::amdgcn_ubfe:
3213 return;
3214 case Intrinsic::amdgcn_inverse_ballot:
3215 case Intrinsic::amdgcn_s_bitreplicate:
3216 case Intrinsic::amdgcn_s_quadmask:
3217 case Intrinsic::amdgcn_s_wqm:
3220 return;
3221 case Intrinsic::amdgcn_ballot:
3222
3223 break;
3224 }
3225 break;
3226 }
3227 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3228 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3229 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3230 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3231 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3235
3236
3237
3239 return;
3240 }
3241 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3242 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
3243 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3244 bool IsDualOrBVH8 =
3245 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
3246 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
3247 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
3248 unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
3251 return;
3252 }
3253 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3254 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3256 switch (IntrID) {
3257 case Intrinsic::amdgcn_ds_ordered_add:
3258 case Intrinsic::amdgcn_ds_ordered_swap: {
3259
3263 return;
3264 }
3265 case Intrinsic::amdgcn_ds_gws_init:
3266 case Intrinsic::amdgcn_ds_gws_barrier:
3267 case Intrinsic::amdgcn_ds_gws_sema_br: {
3268
3271 return;
3272 }
3273 case Intrinsic::amdgcn_ds_gws_sema_v:
3274 case Intrinsic::amdgcn_ds_gws_sema_p:
3275 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3276
3278 return;
3279 }
3280 case Intrinsic::amdgcn_ds_append:
3281 case Intrinsic::amdgcn_ds_consume: {
3283 return;
3284 }
3285 case Intrinsic::amdgcn_s_sendmsg:
3286 case Intrinsic::amdgcn_s_sendmsghalt: {
3287
3289 return;
3290 }
3291 case Intrinsic::amdgcn_s_setreg: {
3293 return;
3294 }
3295 case Intrinsic::amdgcn_s_ttracedata:
3297 return;
3298 case Intrinsic::amdgcn_raw_buffer_load_lds:
3299 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3304 return;
3305 }
3306 case Intrinsic::amdgcn_struct_buffer_load_lds:
3307 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3312 return;
3313 }
3314 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:
3315 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
3316 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
3317 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
3320 return;
3321 }
3322 case Intrinsic::amdgcn_load_to_lds:
3323 case Intrinsic::amdgcn_global_load_lds: {
3326 return;
3327 }
3328 case Intrinsic::amdgcn_lds_direct_load: {
3330
3332 return;
3333 }
3334 case Intrinsic::amdgcn_exp_row:
3337 return;
3338 case Intrinsic::amdgcn_cluster_load_b32:
3339 case Intrinsic::amdgcn_cluster_load_b64:
3340 case Intrinsic::amdgcn_cluster_load_b128: {
3343 return;
3344 }
3345 case Intrinsic::amdgcn_s_sleep_var:
3348 return;
3349 case Intrinsic::amdgcn_s_barrier_join:
3350 case Intrinsic::amdgcn_s_wakeup_barrier:
3352 return;
3353 case Intrinsic::amdgcn_s_barrier_init:
3354 case Intrinsic::amdgcn_s_barrier_signal_var:
3357 return;
3358 case Intrinsic::amdgcn_s_get_barrier_state:
3359 case Intrinsic::amdgcn_s_get_named_barrier_state: {
3361 return;
3362 }
3363 case Intrinsic::amdgcn_s_prefetch_data: {
3364 Register PtrReg = MI.getOperand(1).getReg();
3365 unsigned AS = MRI.getType(PtrReg).getAddressSpace();
3369 } else
3370 MI.eraseFromParent();
3371 return;
3372 }
3373 case Intrinsic::amdgcn_tensor_load_to_lds:
3374 case Intrinsic::amdgcn_tensor_store_from_lds: {
3379 return;
3380 }
3381 case Intrinsic::amdgcn_tensor_load_to_lds_d2:
3382 case Intrinsic::amdgcn_tensor_store_from_lds_d2: {
3385 return;
3386 }
3387 default: {
3390
3391
3392
3393 if (RSrcIntrin->IsImage) {
3395 return;
3396 }
3397 }
3398
3399 break;
3400 }
3401 }
3402 break;
3403 }
3404 case AMDGPU::G_SI_CALL: {
3405
3406
3408
3410 break;
3411
3412
3413
3414
3415 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3416 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3417
3418
3419
3421
3422 unsigned NonCopyInstrsLen = 0;
3428 while (Start->getOpcode() != FrameSetupOpcode) {
3429 --Start;
3430 bool IsCopy = false;
3431 if (Start->getOpcode() == AMDGPU::COPY) {
3432 auto &Dst = Start->getOperand(0);
3433 if (Dst.isReg()) {
3434 Register Reg = Dst.getReg();
3435 if (Reg.isPhysical() && MI.readsRegister(Reg, TRI)) {
3436 IsCopy = true;
3437 } else {
3438
3439
3440 auto &Src = Start->getOperand(1);
3441 if (Src.isReg()) {
3442 Reg = Src.getReg();
3443 IsCopy = Info->getScratchRSrcReg() == Reg;
3444 }
3445 }
3446 }
3447 }
3448
3449 if (IsCopy) {
3450 LastCopy = Start;
3451 NonCopyInstrsLen = NonCopyInstrs.size();
3452 } else {
3453 NonCopyInstrs.push_back(&*Start);
3454 }
3455 }
3456 NonCopyInstrs.resize(NonCopyInstrsLen);
3457
3458 for (auto *NonCopy : reverse(NonCopyInstrs)) {
3459 MBB->splice(LastCopy, MBB, NonCopy->getIterator());
3460 }
3461 Start = LastCopy;
3462
3463
3464 NonCopyInstrs.clear();
3465 NonCopyInstrsLen = 0;
3467 LastCopy = End;
3468 while (End->getOpcode() != FrameDestroyOpcode) {
3469 ++End;
3470 bool IsCopy = false;
3471 if (End->getOpcode() == AMDGPU::COPY) {
3472 auto &Src = End->getOperand(1);
3473 if (Src.isReg()) {
3474 Register Reg = Src.getReg();
3475 IsCopy = Reg.isPhysical() && MI.modifiesRegister(Reg, TRI);
3476 }
3477 }
3478
3479 if (IsCopy) {
3480 LastCopy = End;
3481 NonCopyInstrsLen = NonCopyInstrs.size();
3482 } else {
3484 }
3485 }
3486 NonCopyInstrs.resize(NonCopyInstrsLen);
3487
3488 End = LastCopy;
3489 ++LastCopy;
3490 for (auto *NonCopy : reverse(NonCopyInstrs)) {
3491 MBB->splice(LastCopy, MBB, NonCopy->getIterator());
3492 }
3493
3494 ++End;
3495 B.setInsertPt(B.getMBB(), Start);
3497 break;
3498 }
3499 case AMDGPU::G_LOAD:
3500 case AMDGPU::G_ZEXTLOAD:
3501 case AMDGPU::G_SEXTLOAD: {
3503 return;
3504 break;
3505 }
3506 case AMDGPU::G_DYN_STACKALLOC:
3508 return;
3509 case AMDGPU::G_STACKRESTORE: {
3512 return;
3513 }
3514 case AMDGPU::G_SBFX:
3516 return;
3517 case AMDGPU::G_UBFX:
3519 return;
3520 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3521 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3523 return;
3524 case AMDGPU::G_PREFETCH: {
3525 if (.hasSafeSmemPrefetch() &&
.hasVmemPrefInsts()) {
3526 MI.eraseFromParent();
3527 return;
3528 }
3529 Register PtrReg = MI.getOperand(0).getReg();
3530 unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID);
3531 if (PtrBank == AMDGPU::VGPRRegBankID &&
3532 (.hasVmemPrefInsts() ||
.getOperand(3).getImm())) {
3533
3534 MI.eraseFromParent();
3535 return;
3536 }
3537 unsigned AS = MRI.getType(PtrReg).getAddressSpace();
3540 (.hasSafeSmemPrefetch() &&
3542 .getOperand(3).getImm() ))) {
3543 MI.eraseFromParent();
3544 return;
3545 }
3547 return;
3548 }
3549 default:
3550 break;
3551 }
3552
3554}
3555
3556
3557
3558
3559
3561 if (RB0 == AMDGPU::InvalidRegBankID)
3562 return RB1;
3563 if (RB1 == AMDGPU::InvalidRegBankID)
3564 return RB0;
3565
3566 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3567 return AMDGPU::SGPRRegBankID;
3568
3569 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3570 return AMDGPU::AGPRRegBankID;
3571
3572 return AMDGPU::VGPRRegBankID;
3573}
3574
3576 if (RB0 == AMDGPU::InvalidRegBankID)
3577 return RB1;
3578 if (RB1 == AMDGPU::InvalidRegBankID)
3579 return RB0;
3580
3581
3582
3583
3584 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3585 return AMDGPU::VCCRegBankID;
3586
3587
3589}
3590
3593 unsigned RegBank = AMDGPU::InvalidRegBankID;
3594
3596 if (!MO.isReg())
3597 continue;
3600 RegBank = regBankUnion(RegBank, Bank->getID());
3601 if (RegBank == AMDGPU::VGPRRegBankID)
3602 break;
3603 }
3604 }
3605
3606 return RegBank;
3607}
3608
3613 if (!MO.isReg())
3614 continue;
3617 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3618 return false;
3619 }
3620 }
3621 return true;
3622}
3623
3629
3630 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
3632 if (.isReg())
3633 continue;
3634
3636 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
3637 }
3639 MI.getNumOperands());
3640}
3641
3647
3648
3649
3650
3651
3652 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
3654 if (!Src.isReg())
3655 continue;
3656
3658 unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3659 OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
3660 }
3661
3663 MI.getNumOperands());
3664}
3665
3671
3672 for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
3674 if (.isReg())
3675 continue;
3676
3678 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
3679 }
3680
3682 MI.getNumOperands());
3683}
3684
3688 int RsrcIdx) const {
3689
3690
3691 RsrcIdx += MI.getNumExplicitDefs() + 1;
3692
3693 const int NumOps = MI.getNumOperands();
3695
3696
3697
3698 for (int I = 0; I != NumOps; ++I) {
3699 if (.getOperand(I).isReg())
3700 continue;
3701
3702 Register OpReg = MI.getOperand(I).getReg();
3703
3704 if (!OpReg)
3705 continue;
3706
3708
3709
3710
3711
3712
3713 const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
3714
3715 if (MustBeSGPR) {
3716
3717 unsigned NewBank = getRegBankID(OpReg, MRI, AMDGPU::SGPRRegBankID);
3718 OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
3719 } else {
3720
3721 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
3722 }
3723 }
3724
3726}
3727
3728
3732 LLT PtrTy = MRI.getType(PtrReg);
3734 if (Subtarget.useFlatForGlobal() ||
3736 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
3737
3738
3739
3741 return AMDGPU::getValueMapping(PtrBank->getID(), Size);
3742}
3743
3746
3751 Register PtrReg = MI.getOperand(1).getReg();
3752 LLT PtrTy = MRI.getType(PtrReg);
3755
3758
3760
3763
3764 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
3765 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3766 } else {
3767 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
3768
3769
3770
3771 unsigned PtrBankID = Subtarget.useFlatForGlobal() ?
3772 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3773
3774 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3775 }
3776 } else {
3777 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
3778 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3779 }
3780
3781 OpdsMapping[0] = ValMapping;
3782 OpdsMapping[1] = PtrMapping;
3785 return Mapping;
3786
3787
3788
3789}
3790
3791unsigned
3794 unsigned Default) const {
3797}
3798
3803
3804
3805 unsigned Bank = getRegBankID(Reg, MRI, AMDGPU::SGPRRegBankID);
3807 return AMDGPU::getValueMapping(Bank, Size);
3808}
3809
3815 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
3816}
3817
3823 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID, Size);
3824}
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3839
3840 if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) {
3841 Register DstReg = MI.getOperand(0).getReg();
3842 Register SrcReg = MI.getOperand(1).getReg();
3843
3844
3845
3848
3849
3850
3851
3852 unsigned Size;
3853 if (!SrcReg.isVirtual() && !DstBank &&
3855 DstBank = &AMDGPU::VCCRegBank;
3858 DstBank = &AMDGPU::VCCRegBank;
3860 } else {
3862 }
3863
3864 if (!DstBank)
3865 DstBank = SrcBank;
3866 else if (!SrcBank)
3867 SrcBank = DstBank;
3868
3869 if (MI.getOpcode() != AMDGPU::G_FREEZE &&
3872
3874 unsigned OpdsMappingSize = MI.isCopy() ? 1 : 2;
3876 OpdsMapping[0] = &ValMap;
3877 if (MI.getOpcode() == AMDGPU::G_FREEZE)
3878 OpdsMapping[1] = &ValMap;
3879
3881 1, 1,
3882 getOperandsMapping(OpdsMapping), OpdsMappingSize);
3883 }
3884
3885 if (MI.isRegSequence()) {
3886
3887
3888 unsigned BankID = AMDGPU::SGPRRegBankID;
3889
3890 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3892
3893
3894 if (OpBank != AMDGPU::SGPRRegBankID) {
3895 BankID = AMDGPU::VGPRRegBankID;
3896 break;
3897 }
3898 }
3900
3903 1, 1,
3905 }
3906
3907
3908
3909
3910
3912 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3914
3915
3917 ResultBank = DstBank->getID();
3918
3919 for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I) {
3922
3923
3924 if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {
3925 ResultBank = AMDGPU::VGPRRegBankID;
3926 break;
3927 }
3928
3929
3930 unsigned OpBank = Bank->getID();
3932 }
3933
3934 assert(ResultBank != AMDGPU::InvalidRegBankID);
3935
3936 unsigned Size = MRI.getType(DstReg).getSizeInBits();
3937
3941 1, 1,
3943 }
3944
3947 return Mapping;
3948
3950
3951 switch (MI.getOpcode()) {
3952 default:
3954
3955 case AMDGPU::G_AND:
3956 case AMDGPU::G_OR:
3957 case AMDGPU::G_XOR:
3958 case AMDGPU::G_MUL: {
3959 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3960 if (Size == 1) {
3963
3964 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3965 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3966 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3967 if (DstBank) {
3968 TargetBankID = DstBank->getID();
3969 if (DstBank == &AMDGPU::VCCRegBank) {
3970 TargetBankID = AMDGPU::VCCRegBankID;
3971 BankLHS = AMDGPU::VCCRegBankID;
3972 BankRHS = AMDGPU::VCCRegBankID;
3973 } else {
3975 AMDGPU::SGPRRegBankID);
3977 AMDGPU::SGPRRegBankID);
3978 }
3979 } else {
3981 AMDGPU::VCCRegBankID);
3983 AMDGPU::VCCRegBankID);
3984
3985
3986 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3987 TargetBankID = AMDGPU::VGPRRegBankID;
3988 } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3989 TargetBankID = AMDGPU::VCCRegBankID;
3990 BankLHS = AMDGPU::VCCRegBankID;
3991 BankRHS = AMDGPU::VCCRegBankID;
3992 } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3993 TargetBankID = AMDGPU::SGPRRegBankID;
3994 }
3995 }
3996
3997 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
3998 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
3999 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
4000 break;
4001 }
4002
4003 if (Size == 64) {
4004
4006 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
4007 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
4008 } else {
4009 if (MI.getOpcode() == AMDGPU::G_MUL && Subtarget.hasVectorMulU64())
4010 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4011 else
4012 OpdsMapping[0] =
4013 getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
4014 unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI );
4015 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
4016
4017 unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI );
4018 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
4019 }
4020
4021 break;
4022 }
4023
4024 [[fallthrough]];
4025 }
4026 case AMDGPU::G_PTR_ADD:
4027 case AMDGPU::G_PTRMASK:
4028 case AMDGPU::G_ADD:
4029 case AMDGPU::G_SUB:
4030 case AMDGPU::G_SHL:
4031 case AMDGPU::G_LSHR:
4032 case AMDGPU::G_ASHR:
4033 case AMDGPU::G_UADDO:
4034 case AMDGPU::G_USUBO:
4035 case AMDGPU::G_UADDE:
4036 case AMDGPU::G_SADDE:
4037 case AMDGPU::G_USUBE:
4038 case AMDGPU::G_SSUBE:
4039 case AMDGPU::G_ABS:
4040 case AMDGPU::G_SHUFFLE_VECTOR:
4041 case AMDGPU::G_SBFX:
4042 case AMDGPU::G_UBFX:
4043 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
4044 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
4048 case AMDGPU::G_SMIN:
4049 case AMDGPU::G_SMAX:
4050 case AMDGPU::G_UMIN:
4051 case AMDGPU::G_UMAX:
4053
4054 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 64 &&
4058 }
4060 case AMDGPU::G_FADD:
4061 case AMDGPU::G_FSUB:
4062 case AMDGPU::G_FMUL:
4063 case AMDGPU::G_FMA:
4064 case AMDGPU::G_FFLOOR:
4065 case AMDGPU::G_FCEIL:
4066 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
4067 case AMDGPU::G_FMINNUM:
4068 case AMDGPU::G_FMAXNUM:
4069 case AMDGPU::G_FMINIMUM:
4070 case AMDGPU::G_FMAXIMUM:
4071 case AMDGPU::G_FMINIMUMNUM:
4072 case AMDGPU::G_FMAXIMUMNUM:
4073 case AMDGPU::G_INTRINSIC_TRUNC:
4074 case AMDGPU::G_STRICT_FADD:
4075 case AMDGPU::G_STRICT_FSUB:
4076 case AMDGPU::G_STRICT_FMUL:
4077 case AMDGPU::G_STRICT_FMA: {
4078 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4079 unsigned Size = Ty.getSizeInBits();
4080 if (Subtarget.hasSALUFloatInsts() && Ty.isScalar() &&
4084 }
4085 case AMDGPU::G_FPTOSI:
4086 case AMDGPU::G_FPTOUI:
4087 case AMDGPU::G_SITOFP:
4088 case AMDGPU::G_UITOFP: {
4089 unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4090 unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4091 if (Subtarget.hasSALUFloatInsts() && SizeDst == 32 && SizeSrc == 32 &&
4095 }
4096 case AMDGPU::G_FPTRUNC:
4097 case AMDGPU::G_FPEXT: {
4098 unsigned SizeDst = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4099 unsigned SizeSrc = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4100 if (Subtarget.hasSALUFloatInsts() && SizeDst != 64 && SizeSrc != 64 &&
4104 }
4105 case AMDGPU::G_FSQRT:
4106 case AMDGPU::G_FEXP2:
4107 case AMDGPU::G_FLOG2: {
4108 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4109 if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&
4113 }
4114 case AMDGPU::G_SADDSAT:
4115 case AMDGPU::G_SSUBSAT:
4116 case AMDGPU::G_UADDSAT:
4117 case AMDGPU::G_USUBSAT:
4118 case AMDGPU::G_FMAD:
4119 case AMDGPU::G_FLDEXP:
4120 case AMDGPU::G_FMINNUM_IEEE:
4121 case AMDGPU::G_FMAXNUM_IEEE:
4122 case AMDGPU::G_FCANONICALIZE:
4123 case AMDGPU::G_STRICT_FLDEXP:
4124 case AMDGPU::G_BSWAP:
4125 case AMDGPU::G_FSHR:
4126 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
4127 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
4128 case AMDGPU::G_AMDGPU_RCP_IFLAG:
4129 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4130 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4131 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4132 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4133 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4134 case AMDGPU::G_AMDGPU_SMED3:
4135 case AMDGPU::G_AMDGPU_FMED3:
4137 case AMDGPU::G_UMULH:
4138 case AMDGPU::G_SMULH: {
4142 }
4143 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4144 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4145
4146
4147
4148
4149
4150
4151
4152
4153 bool AllSalu = true;
4154 bool MulSalu = true;
4155 for (unsigned i = 0; i < 5; ++i) {
4156 Register Reg = MI.getOperand(i).getReg();
4158 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4159 AllSalu = false;
4160 if (i == 2 || i == 3) {
4161 MulSalu = false;
4162 break;
4163 }
4164 }
4165 }
4166 }
4167
4168 if (AllSalu)
4170
4171
4172
4173
4174 if (!MulSalu || Subtarget.hasFullRate64Ops())
4176
4177
4178 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4179 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4180 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4181 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4182 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4183 break;
4184 }
4185 case AMDGPU::G_IMPLICIT_DEF: {
4186 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4187 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
4188 break;
4189 }
4190 case AMDGPU::G_FCONSTANT:
4191 case AMDGPU::G_CONSTANT:
4192 case AMDGPU::G_GLOBAL_VALUE:
4193 case AMDGPU::G_FRAME_INDEX:
4194 case AMDGPU::G_BLOCK_ADDR:
4195 case AMDGPU::G_READSTEADYCOUNTER:
4196 case AMDGPU::G_READCYCLECOUNTER: {
4197 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4198 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
4199 break;
4200 }
4201 case AMDGPU::G_DYN_STACKALLOC: {
4202
4203 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4204 unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
4205 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4206 break;
4207 }
4208 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4209
4210
4211
4212
4213 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4214 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4215 break;
4216 }
4217 case AMDGPU::G_INSERT: {
4222 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4223 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4224 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4225 OpdsMapping[3] = nullptr;
4226 break;
4227 }
4228 case AMDGPU::G_EXTRACT: {
4232 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4233 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4234 OpdsMapping[2] = nullptr;
4235 break;
4236 }
4237 case AMDGPU::G_BUILD_VECTOR:
4238 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4239 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4242 unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4243 unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
4244 unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI);
4245 unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
4246
4247 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4248 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4249 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4250 break;
4251 }
4252
4253 [[fallthrough]];
4254 }
4255 case AMDGPU::G_MERGE_VALUES:
4256 case AMDGPU::G_CONCAT_VECTORS: {
4258 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4259 unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4260
4261 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4262
4263 for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)
4264 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4265 break;
4266 }
4267 case AMDGPU::G_BITREVERSE:
4268 case AMDGPU::G_BITCAST:
4269 case AMDGPU::G_INTTOPTR:
4270 case AMDGPU::G_PTRTOINT:
4271 case AMDGPU::G_FABS:
4272 case AMDGPU::G_FNEG: {
4273 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4275 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
4276 break;
4277 }
4278 case AMDGPU::G_AMDGPU_FFBH_U32:
4279 case AMDGPU::G_AMDGPU_FFBL_B32:
4280 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4281 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4282 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4284 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4285 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
4286 break;
4287 }
4288 case AMDGPU::G_CTPOP: {
4289 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4291 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4292
4293
4294
4295
4296 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
4297 break;
4298 }
4299 case AMDGPU::G_TRUNC: {
4300 Register Dst = MI.getOperand(0).getReg();
4301 Register Src = MI.getOperand(1).getReg();
4305 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4306 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4307 break;
4308 }
4309 case AMDGPU::G_ZEXT:
4310 case AMDGPU::G_SEXT:
4311 case AMDGPU::G_ANYEXT:
4312 case AMDGPU::G_SEXT_INREG: {
4313 Register Dst = MI.getOperand(0).getReg();
4314 Register Src = MI.getOperand(1).getReg();
4317
4318 unsigned DstBank;
4321 switch (SrcBank->getID()) {
4322 case AMDGPU::SGPRRegBankID:
4323 DstBank = AMDGPU::SGPRRegBankID;
4324 break;
4325 default:
4326 DstBank = AMDGPU::VGPRRegBankID;
4327 break;
4328 }
4329
4330
4331
4332 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4333 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
4334 SrcSize);
4335 break;
4336 }
4337 case AMDGPU::G_IS_FPCLASS: {
4338 Register SrcReg = MI.getOperand(1).getReg();
4339 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
4340 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4341 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4342 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4343 break;
4344 }
4345 case AMDGPU::G_STORE: {
4346 assert(MI.getOperand(0).isReg());
4347 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4348
4349
4350
4352 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4353 OpdsMapping[0] = ValMapping;
4355 break;
4356 }
4357 case AMDGPU::G_ICMP:
4358 case AMDGPU::G_FCMP: {
4359 unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
4360
4361
4362
4364 AMDGPU::SGPRRegBankID);
4365 unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI);
4366 unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI);
4367
4368 auto canUseSCCICMP = [&]() {
4369 auto Pred =
4371 return Size == 32 ||
4372 (Size == 64 &&
4374 Subtarget.hasScalarCompareEq64());
4375 };
4376 auto canUseSCCFCMP = [&]() {
4377 return Subtarget.hasSALUFloatInsts() && (Size == 32 || Size == 16);
4378 };
4379
4380 bool isICMP = MI.getOpcode() == AMDGPU::G_ICMP;
4381 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4382 Op2Bank == AMDGPU::SGPRRegBankID &&
4383 Op3Bank == AMDGPU::SGPRRegBankID &&
4384 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4385
4386 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4387 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4388
4389
4390
4391 const unsigned ResultSize = 1;
4392
4393 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4394 OpdsMapping[1] = nullptr;
4395 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, Size);
4396 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank, Size);
4397 break;
4398 }
4399 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4400
4401 unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
4402 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4403 unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4404 unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
4405 unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI);
4406 unsigned OutputBankID = regBankUnion(SrcBankID, IdxBank);
4407
4408 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4409 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4410
4411
4412 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4413 break;
4414 }
4415 case AMDGPU::G_INSERT_VECTOR_ELT: {
4417 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4418
4419 unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4420 unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
4421 unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
4422 unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(), MRI);
4423 unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI);
4424
4425 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4426 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4427
4428
4429
4430 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4431 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4432 InsertSize);
4433 } else {
4434 assert(InsertSize == 32 || InsertSize == 64);
4435 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4436 }
4437
4438
4439 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4440 break;
4441 }
4442 case AMDGPU::G_UNMERGE_VALUES: {
4444
4445
4446
4447 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
4449 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
4450 }
4451 break;
4452 }
4453 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4454 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4455 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4456 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4457 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4458 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4459 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4460 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4461 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4462 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4463 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4464 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4465 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4466 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4467 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4468 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4469 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4470 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4471 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4472 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4473 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4474 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4476
4477
4479
4480
4482
4483
4485
4486
4488
4489
4490
4491 break;
4492 }
4493 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4494 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4495 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4496 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4497 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4498 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4499 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4500 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4501 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4502 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4503 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4504 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4505 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32:
4506 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32:
4507 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4508 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4509 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4510
4512
4513
4515
4516
4518
4519
4521
4522
4524
4525
4527
4528
4529
4530 break;
4531 }
4532 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4533
4535
4536
4538
4539
4541
4542
4544
4545
4547
4548
4550
4551
4553
4554
4555
4556 break;
4557 }
4558 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4559 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4560 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4561 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4562 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4563
4564
4567
4568
4569
4570 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4571 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4572 unsigned ResultBank = regBankUnion(RSrcBank, OffsetBank);
4573
4574 unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4575 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4576 break;
4577 }
4578 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
4581 break;
4582 case AMDGPU::G_INTRINSIC:
4583 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4585 default:
4587 case Intrinsic::amdgcn_div_fmas:
4588 case Intrinsic::amdgcn_div_fixup:
4589 case Intrinsic::amdgcn_trig_preop:
4590 case Intrinsic::amdgcn_sin:
4591 case Intrinsic::amdgcn_cos:
4592 case Intrinsic::amdgcn_log_clamp:
4593 case Intrinsic::amdgcn_rcp_legacy:
4594 case Intrinsic::amdgcn_rsq_legacy:
4595 case Intrinsic::amdgcn_rsq_clamp:
4596 case Intrinsic::amdgcn_tanh:
4597 case Intrinsic::amdgcn_fmul_legacy:
4598 case Intrinsic::amdgcn_fma_legacy:
4599 case Intrinsic::amdgcn_frexp_mant:
4600 case Intrinsic::amdgcn_frexp_exp:
4601 case Intrinsic::amdgcn_fract:
4602 case Intrinsic::amdgcn_cvt_pknorm_i16:
4603 case Intrinsic::amdgcn_cvt_pknorm_u16:
4604 case Intrinsic::amdgcn_cvt_pk_i16:
4605 case Intrinsic::amdgcn_cvt_pk_u16:
4606 case Intrinsic::amdgcn_cvt_sr_pk_f16_f32:
4607 case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
4608 case Intrinsic::amdgcn_cvt_pk_f16_fp8:
4609 case Intrinsic::amdgcn_cvt_pk_f16_bf8:
4610 case Intrinsic::amdgcn_cvt_pk_fp8_f16:
4611 case Intrinsic::amdgcn_cvt_pk_bf8_f16:
4612 case Intrinsic::amdgcn_cvt_sr_fp8_f16:
4613 case Intrinsic::amdgcn_cvt_sr_bf8_f16:
4614 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp8:
4615 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp8:
4616 case Intrinsic::amdgcn_cvt_scale_pk8_f16_bf8:
4617 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_bf8:
4618 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp4:
4619 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp4:
4620 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:
4621 case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:
4622 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:
4623 case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:
4624 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:
4625 case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:
4626 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:
4627 case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:
4628 case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:
4629 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:
4630 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:
4631 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:
4632 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16:
4633 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32:
4634 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32:
4635 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32:
4636 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16:
4637 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16:
4638 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f32:
4639 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f32:
4640 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f16:
4641 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f16:
4642 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_bf16:
4643 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_bf16:
4644 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_bf16:
4645 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_bf16:
4646 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f16:
4647 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f16:
4648 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f32:
4649 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f32:
4650 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f32:
4651 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f16:
4652 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_bf16:
4653 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f32:
4654 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f32:
4655 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f16:
4656 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f16:
4657 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_bf16:
4658 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:
4659 case Intrinsic::amdgcn_sat_pk4_i4_i8:
4660 case Intrinsic::amdgcn_sat_pk4_u4_u8:
4661 case Intrinsic::amdgcn_fmed3:
4662 case Intrinsic::amdgcn_cubeid:
4663 case Intrinsic::amdgcn_cubema:
4664 case Intrinsic::amdgcn_cubesc:
4665 case Intrinsic::amdgcn_cubetc:
4666 case Intrinsic::amdgcn_sffbh:
4667 case Intrinsic::amdgcn_fmad_ftz:
4668 case Intrinsic::amdgcn_mbcnt_lo:
4669 case Intrinsic::amdgcn_mbcnt_hi:
4670 case Intrinsic::amdgcn_mul_u24:
4671 case Intrinsic::amdgcn_mul_i24:
4672 case Intrinsic::amdgcn_mulhi_u24:
4673 case Intrinsic::amdgcn_mulhi_i24:
4674 case Intrinsic::amdgcn_lerp:
4675 case Intrinsic::amdgcn_sad_u8:
4676 case Intrinsic::amdgcn_msad_u8:
4677 case Intrinsic::amdgcn_sad_hi_u8:
4678 case Intrinsic::amdgcn_sad_u16:
4679 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4680 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4681 case Intrinsic::amdgcn_mqsad_u32_u8:
4682 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4683 case Intrinsic::amdgcn_alignbyte:
4684 case Intrinsic::amdgcn_perm:
4685 case Intrinsic::amdgcn_prng_b32:
4686 case Intrinsic::amdgcn_fdot2:
4687 case Intrinsic::amdgcn_sdot2:
4688 case Intrinsic::amdgcn_udot2:
4689 case Intrinsic::amdgcn_sdot4:
4690 case Intrinsic::amdgcn_udot4:
4691 case Intrinsic::amdgcn_sdot8:
4692 case Intrinsic::amdgcn_udot8:
4693 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4694 case Intrinsic::amdgcn_fdot2_f16_f16:
4695 case Intrinsic::amdgcn_fdot2_f32_bf16:
4696 case Intrinsic::amdgcn_fdot2c_f32_bf16:
4697 case Intrinsic::amdgcn_sudot4:
4698 case Intrinsic::amdgcn_sudot8:
4699 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4700 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4701 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4702 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4703 case Intrinsic::amdgcn_cvt_f32_fp8:
4704 case Intrinsic::amdgcn_cvt_f32_fp8_e5m3:
4705 case Intrinsic::amdgcn_cvt_f32_bf8:
4706 case Intrinsic::amdgcn_cvt_off_f32_i4:
4707 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4708 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4709 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4710 case Intrinsic::amdgcn_cvt_pk_fp8_f32_e5m3:
4711 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4712 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4713 case Intrinsic::amdgcn_cvt_sr_fp8_f32_e5m3:
4714 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4715 case Intrinsic::amdgcn_cvt_sr_bf16_f32:
4716 case Intrinsic::amdgcn_cvt_sr_f16_f32:
4717 case Intrinsic::amdgcn_cvt_f16_fp8:
4718 case Intrinsic::amdgcn_cvt_f16_bf8:
4719 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
4720 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
4721 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:
4722 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16:
4723 case Intrinsic::amdgcn_cvt_scalef32_f16_fp8:
4724 case Intrinsic::amdgcn_cvt_scalef32_f16_bf8:
4725 case Intrinsic::amdgcn_cvt_scalef32_f32_fp8:
4726 case Intrinsic::amdgcn_cvt_scalef32_f32_bf8:
4727 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f32:
4728 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f32:
4729 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp8:
4730 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_bf8:
4731 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f16:
4732 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_bf16:
4733 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f16:
4734 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_bf16:
4735 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp4:
4736 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f32:
4737 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp4:
4738 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp4:
4739 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_fp6:
4740 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_bf6:
4741 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_bf6:
4742 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_bf6:
4743 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_fp6:
4744 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_fp6:
4745 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_bf8:
4746 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_bf8:
4747 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp8:
4748 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp8:
4749 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f16:
4750 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_bf16:
4751 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
4752 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
4753 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
4754 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
4755 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
4756 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
4757 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
4758 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
4759 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
4760 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_bf16:
4761 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f16:
4762 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f32:
4763 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_bf16:
4764 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f16:
4765 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f32:
4766 case Intrinsic::amdgcn_ashr_pk_i8_i32:
4767 case Intrinsic::amdgcn_ashr_pk_u8_i32:
4768 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:
4769 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32:
4770 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4771 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4772 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4773 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4774 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4775 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4776 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4777 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4778 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4779 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4780 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4781 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4782 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4783 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4784 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4785 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4786 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4787 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4788 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4789 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4790 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4791 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4792 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4793 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4794 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4795 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4796 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4797 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4798 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4799 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
4800 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8:
4801 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8:
4802 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8:
4803 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8:
4804 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8:
4805 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8:
4806 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8:
4807 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8:
4808 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8:
4809 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8:
4810 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8:
4811 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8:
4812 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8:
4813 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8:
4814 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8:
4815 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8:
4816 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
4817 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
4818 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
4819 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
4820 case Intrinsic::amdgcn_wmma_f32_32x16x128_f4:
4821 case Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4:
4822 case Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4:
4823 case Intrinsic::amdgcn_swmmac_f16_16x16x64_f16:
4824 case Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16:
4825 case Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16:
4826 case Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16:
4827 case Intrinsic::amdgcn_swmmac_f32_16x16x64_f16:
4828 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8:
4829 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8:
4830 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8:
4831 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8:
4832 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8:
4833 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8:
4834 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8:
4835 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8:
4836 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
4837 case Intrinsic::amdgcn_perm_pk16_b4_u4:
4838 case Intrinsic::amdgcn_perm_pk16_b6_u4:
4839 case Intrinsic::amdgcn_perm_pk16_b8_u4:
4840 case Intrinsic::amdgcn_add_max_i32:
4841 case Intrinsic::amdgcn_add_max_u32:
4842 case Intrinsic::amdgcn_add_min_i32:
4843 case Intrinsic::amdgcn_add_min_u32:
4844 case Intrinsic::amdgcn_pk_add_max_i16:
4845 case Intrinsic::amdgcn_pk_add_max_u16:
4846 case Intrinsic::amdgcn_pk_add_min_i16:
4847 case Intrinsic::amdgcn_pk_add_min_u16:
4849 case Intrinsic::amdgcn_log:
4850 case Intrinsic::amdgcn_exp2:
4851 case Intrinsic::amdgcn_rcp:
4852 case Intrinsic::amdgcn_rsq:
4853 case Intrinsic::amdgcn_sqrt: {
4854 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4855 if (Subtarget.hasPseudoScalarTrans() && (Size == 16 || Size == 32) &&
4859 }
4860 case Intrinsic::amdgcn_sbfe:
4861 case Intrinsic::amdgcn_ubfe:
4865 case Intrinsic::amdgcn_ds_swizzle:
4866 case Intrinsic::amdgcn_ds_permute:
4867 case Intrinsic::amdgcn_ds_bpermute:
4868 case Intrinsic::amdgcn_update_dpp:
4869 case Intrinsic::amdgcn_mov_dpp8:
4870 case Intrinsic::amdgcn_mov_dpp:
4871 case Intrinsic::amdgcn_strict_wwm:
4872 case Intrinsic::amdgcn_wwm:
4873 case Intrinsic::amdgcn_strict_wqm:
4874 case Intrinsic::amdgcn_wqm:
4875 case Intrinsic::amdgcn_softwqm:
4876 case Intrinsic::amdgcn_set_inactive:
4877 case Intrinsic::amdgcn_set_inactive_chain_arg:
4878 case Intrinsic::amdgcn_permlane64:
4879 case Intrinsic::amdgcn_ds_bpermute_fi_b32:
4881 case Intrinsic::amdgcn_cvt_pkrtz:
4885 case Intrinsic::amdgcn_kernarg_segment_ptr:
4886 case Intrinsic::amdgcn_s_getpc:
4887 case Intrinsic::amdgcn_groupstaticsize:
4888 case Intrinsic::amdgcn_reloc_constant:
4889 case Intrinsic::returnaddress: {
4890 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4891 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
4892 break;
4893 }
4894 case Intrinsic::amdgcn_wqm_vote: {
4895 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4896 OpdsMapping[0] = OpdsMapping[2]
4897 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
4898 break;
4899 }
4900 case Intrinsic::amdgcn_ps_live: {
4901 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4902 break;
4903 }
4904 case Intrinsic::amdgcn_div_scale: {
4905 unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4906 unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4907 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4908 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4909
4910 unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
4911 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4912 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4913 break;
4914 }
4915 case Intrinsic::amdgcn_class: {
4916 Register Src0Reg = MI.getOperand(2).getReg();
4917 Register Src1Reg = MI.getOperand(3).getReg();
4918 unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();
4919 unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
4920 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4921 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4922 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4923 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4924 break;
4925 }
4926 case Intrinsic::amdgcn_icmp:
4927 case Intrinsic::amdgcn_fcmp: {
4928 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4929
4930 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4931 unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
4932 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4933 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4934 break;
4935 }
4936 case Intrinsic::amdgcn_readlane: {
4937
4938 Register IdxReg = MI.getOperand(3).getReg();
4939 unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
4940 unsigned IdxBank = getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);
4941 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4942 [[fallthrough]];
4943 }
4944 case Intrinsic::amdgcn_readfirstlane: {
4945 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4946 unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
4947 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4948 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4949 break;
4950 }
4951 case Intrinsic::amdgcn_writelane: {
4952 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4953 Register SrcReg = MI.getOperand(2).getReg();
4954 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
4955 unsigned SrcBank = getRegBankID(SrcReg, MRI, AMDGPU::SGPRRegBankID);
4956 Register IdxReg = MI.getOperand(3).getReg();
4957 unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
4958 unsigned IdxBank = getRegBankID(IdxReg, MRI, AMDGPU::SGPRRegBankID);
4959 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4960
4961
4962
4963 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4964 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4965 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4966 break;
4967 }
4968 case Intrinsic::amdgcn_if_break: {
4970 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
4971 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4972 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
4973 break;
4974 }
4975 case Intrinsic::amdgcn_permlane16:
4976 case Intrinsic::amdgcn_permlanex16: {
4978 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4979 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4980 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4983 break;
4984 }
4985 case Intrinsic::amdgcn_permlane_bcast:
4986 case Intrinsic::amdgcn_permlane_up:
4987 case Intrinsic::amdgcn_permlane_down:
4988 case Intrinsic::amdgcn_permlane_xor: {
4990 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4991 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4994 break;
4995 }
4996 case Intrinsic::amdgcn_permlane_idx_gen: {
4998 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
4999 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5001 break;
5002 }
5003 case Intrinsic::amdgcn_permlane16_var:
5004 case Intrinsic::amdgcn_permlanex16_var: {
5006 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5007 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5008 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5009 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5010 break;
5011 }
5012 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
5013 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
5014 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
5015 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
5016 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
5017 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
5018 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
5019 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
5020 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
5021 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
5022 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
5023 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
5024 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
5025 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
5026 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
5027 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
5028 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
5029 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
5030 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
5031 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
5032 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
5033 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
5034 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
5035 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
5036 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
5037 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
5038 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
5039 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
5040 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
5041 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
5042 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
5043 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
5044 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
5045 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
5046 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
5047 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
5048 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
5049 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
5050 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8:
5051 case Intrinsic::amdgcn_mfma_f32_16x16x32_f16:
5052 case Intrinsic::amdgcn_mfma_f32_32x32x16_f16:
5053 case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
5054 case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
5055 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
5056 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5057 unsigned MinNumRegsRequired = DstSize / 32;
5058
5059
5060
5061
5062
5063
5064
5066
5067 bool UseAGPRForm = .hasGFX90AInsts() ||
5068 Info->selectAGPRFormMFMA(MinNumRegsRequired);
5069
5070 OpdsMapping[0] =
5075 OpdsMapping[4] =
5078 break;
5079 }
5080 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
5081 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
5082 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5083 unsigned MinNumRegsRequired = DstSize / 32;
5084
5087
5088 OpdsMapping[0] =
5091
5094 OpdsMapping[4] =
5097
5100 break;
5101 }
5102 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
5103 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
5104 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
5105 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
5106 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
5107 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
5108 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
5109 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
5110 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
5111 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
5112 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
5113 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
5114 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
5115 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
5116 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
5117 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
5118 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
5119 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
5120 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
5121 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
5122 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
5123 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
5124 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
5125 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
5126 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
5127 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
5128 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
5129 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
5130 Register DstReg = MI.getOperand(0).getReg();
5131 unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
5132 unsigned MinNumRegsRequired = DstSize / 32;
5135
5136
5139
5142 OpdsMapping[4] =
5146 break;
5147 }
5148 case Intrinsic::amdgcn_interp_p1:
5149 case Intrinsic::amdgcn_interp_p2:
5150 case Intrinsic::amdgcn_interp_mov:
5151 case Intrinsic::amdgcn_interp_p1_f16:
5152 case Intrinsic::amdgcn_interp_p2_f16:
5153 case Intrinsic::amdgcn_lds_param_load: {
5154 const int M0Idx = MI.getNumOperands() - 1;
5155 Register M0Reg = MI.getOperand(M0Idx).getReg();
5156 unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);
5157 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5158
5159 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5160 for (int I = 2; I != M0Idx && MI.getOperand(I).isReg(); ++I)
5161 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5162
5163
5164
5165 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5166 break;
5167 }
5168 case Intrinsic::amdgcn_interp_inreg_p10:
5169 case Intrinsic::amdgcn_interp_inreg_p2:
5170 case Intrinsic::amdgcn_interp_inreg_p10_f16:
5171 case Intrinsic::amdgcn_interp_inreg_p2_f16:
5172 case Intrinsic::amdgcn_interp_p10_rtz_f16:
5173 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
5174 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5175 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5176 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5177 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5178 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5179 break;
5180 }
5181 case Intrinsic::amdgcn_permlane16_swap:
5182 case Intrinsic::amdgcn_permlane32_swap: {
5183 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5184 OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] =
5185 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5186 break;
5187 }
5188 case Intrinsic::amdgcn_ballot: {
5189 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5190 unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
5191 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5192 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
5193 break;
5194 }
5195 case Intrinsic::amdgcn_inverse_ballot: {
5196
5197 Register MaskReg = MI.getOperand(2).getReg();
5198 unsigned MaskSize = MRI.getType(MaskReg).getSizeInBits();
5199 unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
5200 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5201 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5202 break;
5203 }
5204 case Intrinsic::amdgcn_bitop3: {
5206 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5207 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5208 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5209 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5210 break;
5211 }
5212 case Intrinsic::amdgcn_s_quadmask:
5213 case Intrinsic::amdgcn_s_wqm: {
5214 Register MaskReg = MI.getOperand(2).getReg();
5215 unsigned MaskSize = MRI.getType(MaskReg).getSizeInBits();
5216 unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
5217 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
5218 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5219 break;
5220 }
5221 case Intrinsic::amdgcn_wave_reduce_add:
5222 case Intrinsic::amdgcn_wave_reduce_fadd:
5223 case Intrinsic::amdgcn_wave_reduce_sub:
5224 case Intrinsic::amdgcn_wave_reduce_fsub:
5225 case Intrinsic::amdgcn_wave_reduce_min:
5226 case Intrinsic::amdgcn_wave_reduce_umin:
5227 case Intrinsic::amdgcn_wave_reduce_fmin:
5228 case Intrinsic::amdgcn_wave_reduce_max:
5229 case Intrinsic::amdgcn_wave_reduce_umax:
5230 case Intrinsic::amdgcn_wave_reduce_fmax:
5231 case Intrinsic::amdgcn_wave_reduce_and:
5232 case Intrinsic::amdgcn_wave_reduce_or:
5233 case Intrinsic::amdgcn_wave_reduce_xor: {
5234 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5235 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5236 unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
5237 auto regBankID =
5238 isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5239 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
5240 break;
5241 }
5242 case Intrinsic::amdgcn_s_bitreplicate:
5243 Register MaskReg = MI.getOperand(2).getReg();
5244 unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
5245 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5246 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
5247 }
5248 break;
5249 }
5250 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
5251 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
5252 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
5253 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
5254 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
5257 assert(RSrcIntrin && "missing RsrcIntrinsic for image intrinsic");
5258
5259
5260
5263 }
5264 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
5265 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
5266 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5267 bool IsDualOrBVH8 =
5268 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
5269 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
5270 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
5271 unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods;
5272 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5273 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5274 if (IsDualOrBVH8) {
5275 OpdsMapping[1] = AMDGPU::getValueMapping(
5276 AMDGPU::VGPRRegBankID,
5277 MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
5278 OpdsMapping[2] = AMDGPU::getValueMapping(
5279 AMDGPU::VGPRRegBankID,
5280 MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
5281 }
5282 OpdsMapping[LastRegOpIdx] =
5284 if (LastRegOpIdx == 3) {
5285
5286 unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
5287 if (Size > 256)
5289 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5290 } else {
5291
5292 unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
5293 for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) {
5294 unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
5295 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
5296 }
5297 }
5298 break;
5299 }
5300 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
5301 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
5303 switch (IntrID) {
5304 case Intrinsic::amdgcn_s_getreg:
5305 case Intrinsic::amdgcn_s_memtime:
5306 case Intrinsic::amdgcn_s_memrealtime:
5307 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
5308 case Intrinsic::amdgcn_s_sendmsg_rtn: {
5309 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5310 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
5311 break;
5312 }
5313 case Intrinsic::amdgcn_global_atomic_fmin_num:
5314 case Intrinsic::amdgcn_global_atomic_fmax_num:
5315 case Intrinsic::amdgcn_flat_atomic_fmin_num:
5316 case Intrinsic::amdgcn_flat_atomic_fmax_num:
5317 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
5318 case Intrinsic::amdgcn_global_load_tr_b64:
5319 case Intrinsic::amdgcn_global_load_tr_b128:
5320 case Intrinsic::amdgcn_global_load_tr4_b64:
5321 case Intrinsic::amdgcn_global_load_tr6_b96:
5322 case Intrinsic::amdgcn_ds_load_tr8_b64:
5323 case Intrinsic::amdgcn_ds_load_tr16_b128:
5324 case Intrinsic::amdgcn_ds_load_tr4_b64:
5325 case Intrinsic::amdgcn_ds_load_tr6_b96:
5326 case Intrinsic::amdgcn_flat_load_monitor_b32:
5327 case Intrinsic::amdgcn_flat_load_monitor_b64:
5328 case Intrinsic::amdgcn_flat_load_monitor_b128:
5329 case Intrinsic::amdgcn_global_load_monitor_b32:
5330 case Intrinsic::amdgcn_global_load_monitor_b64:
5331 case Intrinsic::amdgcn_global_load_monitor_b128:
5332 case Intrinsic::amdgcn_ds_read_tr4_b64:
5333 case Intrinsic::amdgcn_ds_read_tr6_b96:
5334 case Intrinsic::amdgcn_ds_read_tr8_b64:
5335 case Intrinsic::amdgcn_ds_read_tr16_b64:
5336 case Intrinsic::amdgcn_ds_atomic_async_barrier_arrive_b64:
5337 case Intrinsic::amdgcn_ds_atomic_barrier_arrive_rtn_b64:
5339 case Intrinsic::amdgcn_ds_ordered_add:
5340 case Intrinsic::amdgcn_ds_ordered_swap: {
5341 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5342 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5344 AMDGPU::SGPRRegBankID);
5345 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
5346 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5347 break;
5348 }
5349 case Intrinsic::amdgcn_ds_append:
5350 case Intrinsic::amdgcn_ds_consume: {
5351 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5352 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5354 break;
5355 }
5356 case Intrinsic::amdgcn_exp_compr:
5357 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5358 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5359 break;
5360 case Intrinsic::amdgcn_exp:
5361
5362 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5363 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5364 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5365 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5366 break;
5367 case Intrinsic::amdgcn_exp_row:
5368 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5369 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5370 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5371 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5373 break;
5374 case Intrinsic::amdgcn_s_sendmsg:
5375 case Intrinsic::amdgcn_s_sendmsghalt: {
5376
5378 AMDGPU::SGPRRegBankID);
5379 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5380 break;
5381 }
5382 case Intrinsic::amdgcn_s_setreg: {
5383
5385 AMDGPU::SGPRRegBankID);
5386 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5387 break;
5388 }
5389 case Intrinsic::amdgcn_s_ttracedata: {
5390
5391 unsigned Bank =
5392 getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID);
5393 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5394 break;
5395 }
5396 case Intrinsic::amdgcn_end_cf: {
5398 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
5399 break;
5400 }
5401 case Intrinsic::amdgcn_else: {
5403 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5404 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5405 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5406 break;
5407 }
5408 case Intrinsic::amdgcn_init_whole_wave:
5409 case Intrinsic::amdgcn_live_mask: {
5410 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5411 break;
5412 }
5413 case Intrinsic::amdgcn_wqm_demote:
5414 case Intrinsic::amdgcn_kill: {
5415 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5416 break;
5417 }
5418 case Intrinsic::amdgcn_raw_buffer_load:
5419 case Intrinsic::amdgcn_raw_ptr_buffer_load:
5420 case Intrinsic::amdgcn_raw_atomic_buffer_load:
5421 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
5422 case Intrinsic::amdgcn_raw_tbuffer_load:
5423 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
5424
5425
5430 break;
5431 }
5432 case Intrinsic::amdgcn_raw_buffer_load_lds:
5433 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
5438 break;
5439 }
5440 case Intrinsic::amdgcn_raw_buffer_store:
5441 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5442 case Intrinsic::amdgcn_raw_buffer_store_format:
5443 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5444 case Intrinsic::amdgcn_raw_tbuffer_store:
5445 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5450 break;
5451 }
5452 case Intrinsic::amdgcn_struct_buffer_load:
5453 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5454 case Intrinsic::amdgcn_struct_tbuffer_load:
5455 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5456 case Intrinsic::amdgcn_struct_atomic_buffer_load:
5457 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
5463 break;
5464 }
5465 case Intrinsic::amdgcn_struct_buffer_load_lds:
5466 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5472 break;
5473 }
5474 case Intrinsic::amdgcn_struct_buffer_store:
5475 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5476 case Intrinsic::amdgcn_struct_tbuffer_store:
5477 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5483 break;
5484 }
5485 case Intrinsic::amdgcn_init_exec_from_input: {
5487 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
5488 break;
5489 }
5490 case Intrinsic::amdgcn_ds_gws_init:
5491 case Intrinsic::amdgcn_ds_gws_barrier:
5492 case Intrinsic::amdgcn_ds_gws_sema_br: {
5493 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5494
5495
5497 AMDGPU::SGPRRegBankID);
5498 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5499 break;
5500 }
5501 case Intrinsic::amdgcn_ds_gws_sema_v:
5502 case Intrinsic::amdgcn_ds_gws_sema_p:
5503 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5504
5506 AMDGPU::SGPRRegBankID);
5507 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5508 break;
5509 }
5510 case Intrinsic::amdgcn_cluster_load_b32:
5511 case Intrinsic::amdgcn_cluster_load_b64:
5512 case Intrinsic::amdgcn_cluster_load_b128: {
5515 unsigned M0Bank =
5516 getRegBankID(MI.getOperand(4).getReg(), MRI, AMDGPU::SGPRRegBankID);
5517 OpdsMapping[4] = AMDGPU::getValueMapping(M0Bank, 32);
5518 break;
5519 }
5520 case Intrinsic::amdgcn_cluster_load_async_to_lds_b8:
5521 case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
5522 case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
5523 case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
5526 unsigned M0Bank =
5527 getRegBankID(MI.getOperand(5).getReg(), MRI, AMDGPU::SGPRRegBankID);
5528 OpdsMapping[5] = AMDGPU::getValueMapping(M0Bank, 32);
5529 break;
5530 }
5531 case Intrinsic::amdgcn_global_store_async_from_lds_b8:
5532 case Intrinsic::amdgcn_global_store_async_from_lds_b32:
5533 case Intrinsic::amdgcn_global_store_async_from_lds_b64:
5534 case Intrinsic::amdgcn_global_store_async_from_lds_b128:
5535 case Intrinsic::amdgcn_global_load_async_to_lds_b8:
5536 case Intrinsic::amdgcn_global_load_async_to_lds_b32:
5537 case Intrinsic::amdgcn_global_load_async_to_lds_b64:
5538 case Intrinsic::amdgcn_global_load_async_to_lds_b128:
5539 case Intrinsic::amdgcn_load_to_lds:
5540 case Intrinsic::amdgcn_global_load_lds: {
5543 break;
5544 }
5545 case Intrinsic::amdgcn_lds_direct_load: {
5546 const int M0Idx = MI.getNumOperands() - 1;
5547 Register M0Reg = MI.getOperand(M0Idx).getReg();
5548 unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);
5549 unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5550
5551 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5552 for (int I = 2; I != M0Idx && MI.getOperand(I).isReg(); ++I)
5553 OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5554
5555
5556
5557 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5558 break;
5559 }
5560 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5561 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5564 break;
5565 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
5566 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
5567 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
5568 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn: {
5569 OpdsMapping[0] =
5571 OpdsMapping[1] =
5573 OpdsMapping[3] =
5575 OpdsMapping[4] =
5577 OpdsMapping[5] =
5579 break;
5580 }
5581 case Intrinsic::amdgcn_s_sleep_var:
5583 break;
5584 case Intrinsic::amdgcn_s_barrier_join:
5585 case Intrinsic::amdgcn_s_wakeup_barrier:
5587 break;
5588 case Intrinsic::amdgcn_s_barrier_init:
5589 case Intrinsic::amdgcn_s_barrier_signal_var:
5592 break;
5593 case Intrinsic::amdgcn_s_barrier_signal_isfirst: {
5594 const unsigned ResultSize = 1;
5595 OpdsMapping[0] =
5596 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5597 break;
5598 }
5599 case Intrinsic::amdgcn_s_get_barrier_state:
5600 case Intrinsic::amdgcn_s_get_named_barrier_state: {
5603 break;
5604 }
5605 case Intrinsic::amdgcn_pops_exiting_wave_id:
5607 case Intrinsic::amdgcn_tensor_load_to_lds_d2:
5608 case Intrinsic::amdgcn_tensor_store_from_lds_d2:
5609 case Intrinsic::amdgcn_tensor_load_to_lds:
5610 case Intrinsic::amdgcn_tensor_store_from_lds: {
5611
5612
5613 for (unsigned I = 1; I < MI.getNumOperands(); ++I) {
5614 if (MI.getOperand(I).isReg()) {
5615 Register Reg = MI.getOperand(I).getReg();
5618 OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size);
5619 }
5620 }
5621 break;
5622 }
5623 case Intrinsic::amdgcn_s_prefetch_data: {
5626 break;
5627 }
5628 case Intrinsic::amdgcn_flat_prefetch:
5629 case Intrinsic::amdgcn_global_prefetch:
5631 default:
5633 }
5634 break;
5635 }
5636 case AMDGPU::G_SELECT: {
5637 unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
5639 AMDGPU::SGPRRegBankID);
5641 AMDGPU::SGPRRegBankID);
5642 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5643 Op3Bank == AMDGPU::SGPRRegBankID;
5644
5645 unsigned CondBankDefault = SGPRSrcs ?
5646 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5647 unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI,
5648 CondBankDefault);
5649 if (CondBank == AMDGPU::SGPRRegBankID)
5650 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5651 else if (CondBank == AMDGPU::VGPRRegBankID)
5652 CondBank = AMDGPU::VCCRegBankID;
5653
5654 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5655 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5656
5657 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5658
5659
5660 if (Size == 64) {
5661 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
5662 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5663 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
5664 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
5665 } else {
5666 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
5667 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5668 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
5669 OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
5670 }
5671
5672 break;
5673 }
5674
5675 case AMDGPU::G_SI_CALL: {
5676 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5677
5678
5680
5681
5682 for (unsigned I = 4; I < MI.getNumOperands(); ++I) {
5683 if (MI.getOperand(I).isReg()) {
5684 Register Reg = MI.getOperand(I).getReg();
5687 OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size);
5688 }
5689 }
5690 break;
5691 }
5692 case AMDGPU::G_LOAD:
5693 case AMDGPU::G_ZEXTLOAD:
5694 case AMDGPU::G_SEXTLOAD:
5696
5697 case AMDGPU::G_ATOMICRMW_XCHG:
5698 case AMDGPU::G_ATOMICRMW_ADD:
5699 case AMDGPU::G_ATOMICRMW_SUB:
5700 case AMDGPU::G_ATOMICRMW_AND:
5701 case AMDGPU::G_ATOMICRMW_OR:
5702 case AMDGPU::G_ATOMICRMW_XOR:
5703 case AMDGPU::G_ATOMICRMW_MAX:
5704 case AMDGPU::G_ATOMICRMW_MIN:
5705 case AMDGPU::G_ATOMICRMW_UMAX:
5706 case AMDGPU::G_ATOMICRMW_UMIN:
5707 case AMDGPU::G_ATOMICRMW_FADD:
5708 case AMDGPU::G_ATOMICRMW_FMIN:
5709 case AMDGPU::G_ATOMICRMW_FMAX:
5710 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5711 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5712 case AMDGPU::G_ATOMICRMW_USUB_COND:
5713 case AMDGPU::G_ATOMICRMW_USUB_SAT:
5714 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5718 break;
5719 }
5720 case AMDGPU::G_ATOMIC_CMPXCHG: {
5725 break;
5726 }
5727 case AMDGPU::G_BRCOND: {
5729 AMDGPU::SGPRRegBankID);
5730 assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
5731 if (Bank != AMDGPU::SGPRRegBankID)
5732 Bank = AMDGPU::VCCRegBankID;
5733
5734 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5735 break;
5736 }
5737 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
5739 case AMDGPU::G_PREFETCH:
5741 break;
5742 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP:
5743 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN:
5744 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5745 break;
5746 }
5747
5750 MI.getNumOperands());
5751}
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
Definition AMDGPURegisterBankInfo.cpp:1758
static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)
Definition AMDGPURegisterBankInfo.cpp:3575
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
Definition AMDGPURegisterBankInfo.cpp:1797
static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)
Definition AMDGPURegisterBankInfo.cpp:2020
static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)
Definition AMDGPURegisterBankInfo.cpp:1736
static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)
Implement extending a 32-bit value to a 64-bit value.
Definition AMDGPURegisterBankInfo.cpp:1912
static unsigned getExtendOp(unsigned Opc)
Definition AMDGPURegisterBankInfo.cpp:1718
static bool isVectorRegisterBank(const RegisterBank &Bank)
Definition AMDGPURegisterBankInfo.cpp:222
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
Definition AMDGPURegisterBankInfo.cpp:3560
static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)
Split Ty into 2 pieces.
Definition AMDGPURegisterBankInfo.cpp:1016
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
Definition AMDGPURegisterBankInfo.cpp:680
static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)
Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.
Definition AMDGPURegisterBankInfo.cpp:1891
static LLT widen96To128(LLT Ty)
Definition AMDGPURegisterBankInfo.cpp:1032
static LLT getHalfSizedType(LLT Ty)
Definition AMDGPURegisterBankInfo.cpp:688
static unsigned getSBufferLoadCorrespondingBufferLoadOpcode(unsigned Opc)
Definition AMDGPURegisterBankInfo.cpp:1326
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static constexpr MCPhysReg SPReg
Interface definition for SIRegisterInfo.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:1159
std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const
Definition AMDGPURegisterBankInfo.cpp:1811
bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const
Definition AMDGPURegisterBankInfo.cpp:972
const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const
Definition AMDGPURegisterBankInfo.cpp:3686
InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
Definition AMDGPURegisterBankInfo.cpp:231
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
Definition AMDGPURegisterBankInfo.cpp:384
bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const
Definition AMDGPURegisterBankInfo.cpp:1856
bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const
Legalize instruction MI where operands in OpIndices must be SGPRs.
Definition AMDGPURegisterBankInfo.cpp:773
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
Definition AMDGPURegisterBankInfo.cpp:288
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
Definition AMDGPURegisterBankInfo.cpp:205
bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
Definition AMDGPURegisterBankInfo.cpp:1568
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const
Definition AMDGPURegisterBankInfo.cpp:3792
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const
Handle register layout difference for f16 images for some subtargets.
Definition AMDGPURegisterBankInfo.cpp:1771
const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:3745
void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override
See RegisterBankInfo::applyMapping.
Definition AMDGPURegisterBankInfo.cpp:2191
bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const
Definition AMDGPURegisterBankInfo.cpp:1462
bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const
Definition AMDGPURegisterBankInfo.cpp:1213
const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Definition AMDGPURegisterBankInfo.cpp:3811
bool isScalarLoadLegal(const MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:442
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const
Definition AMDGPURegisterBankInfo.cpp:1243
const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Definition AMDGPURegisterBankInfo.cpp:3800
bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:1041
void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const
Split 64-bit value Reg into two 32-bit halves and populate them into Regs.
Definition AMDGPURegisterBankInfo.cpp:657
const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const
Return the mapping for a pointer argument.
Definition AMDGPURegisterBankInfo.cpp:3730
unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:3591
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
Definition AMDGPURegisterBankInfo.cpp:344
bool isDivergentRegBank(const RegisterBank *RB) const override
Returns true if the register bank is considered divergent.
Definition AMDGPURegisterBankInfo.cpp:227
void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const
Definition AMDGPURegisterBankInfo.cpp:1002
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
Definition AMDGPURegisterBankInfo.cpp:468
const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:3625
const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:3667
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
Definition AMDGPURegisterBankInfo.cpp:3836
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
Definition AMDGPURegisterBankInfo.cpp:262
const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Definition AMDGPURegisterBankInfo.cpp:3819
const GCNSubtarget & Subtarget
const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:3643
bool isSALUMapping(const MachineInstr &MI) const
Definition AMDGPURegisterBankInfo.cpp:3609
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const
Definition AMDGPURegisterBankInfo.cpp:701
bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
Definition AMDGPURegisterBankInfo.cpp:1344
void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
Definition AMDGPURegisterBankInfo.cpp:2124
const SIRegisterInfo * TRI
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned MovTermOpc
const unsigned AndSaveExecOpc
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Abstract class that contains various methods for clients to notify about changes.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT getScalarType() const
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
TypeSize getValue() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
const InstructionMapping & getInstrMapping() const
The final mapping of the instruction.
MachineInstr & getMI() const
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
const unsigned * Sizes
Hold the sizes of the register banks for all HwModes.
bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
SmallVector< const InstructionMapping *, 4 > InstructionMappings
Convenient type to represent the alternatives for mapping an instruction.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool selectAGPRFormMFMA(unsigned NumRegs) const
Return true if an MFMA that requires at least NumRegs should select to the AGPR form,...
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (, var-idx) should be expanded into a set of cmp...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
bool isFlatGlobalAddrSpace(unsigned AS)
bool isUniformMMO(const MachineMemOperand *MMO)
bool isExtendedGlobalAddrSpace(unsigned AS)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
auto reverse(ContainerTy &&C)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
const RegisterBank * RegBank
Register bank where the partial value lives.
unsigned Length
Length of this mapping in bits.
Helper struct that represents how a value is mapped through different register banks.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
The llvm::once_flag structure.