LLVM: lib/Target/AMDGPU/SIRegisterInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling SGPRs to VGPRs"),
37
38std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41
42
43
44
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
49 const Twine &ErrMsg) {
52}
53
54namespace llvm {
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
84
85
92
93
94
95
97
99
101
103
106
117
122 MI->getOperand(0).isKill(), Index, RS) {}
123
134
136 ExecReg = AMDGPU::EXEC_LO;
137 MovOpc = AMDGPU::S_MOV_B32;
138 NotOpc = AMDGPU::S_NOT_B32;
139 } else {
141 MovOpc = AMDGPU::S_MOV_B64;
142 NotOpc = AMDGPU::S_NOT_B64;
143 }
144
145 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
147 SuperReg != AMDGPU::EXEC && "exec should never spill");
148 }
149
157
158
159
160
161
162
163
164
165
166
167
168
169
170
172
173
174
175
176
177
178 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
179 TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
180 0, false);
181
182
185
186
188 } else {
189
192 }
193
195
196
198 }
199
200
201
203
204
205 assert( && "Exec is already saved, refuse to save again");
207 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
209 SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
210
212
215
217 auto I =
221
222 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, false);
223 } else {
224
225
226
227 if (RS->isRegUsed(AMDGPU::SCC))
229 "unhandled SGPR spill to memory");
230
231
233 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, false,
234 false);
235
239 I->getOperand(2).setIsDead();
240 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, false);
241 }
242 }
243
244
245
246
247
248
249
250
251
252
253
256
257 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, true,
258 false);
259
262
263
266 }
267 } else {
268
269 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, true,
270 false);
274 I->getOperand(2).setIsDead();
275
276
278 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, true);
279 }
280
281
285 }
286 }
287
288
289
290
291
292
293
294
297
298 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
299 } else {
300
301
302
303 if (RS->isRegUsed(AMDGPU::SCC))
305 "unhandled SGPR spill to memory");
306
307
308 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
309 false);
310
313 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
316 }
317 }
318
321 MI = NewMI;
322 MBB = NewMBB;
323 }
324};
325
326}
327
330 ST.getAMDGPUDwarfFlavour(),
331 0,
334
335 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
336 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
337 (getSubRegIndexLaneMask(AMDGPU::lo16) |
338 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
339 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
340 "getNumCoveredRegs() will not work with generated subreg masks!");
341
342 RegPressureIgnoredUnits.resize(getNumRegUnits());
343 RegPressureIgnoredUnits.set(
344 static_cast<unsigned>(*regunits(MCRegister::from(AMDGPU::M0)).begin()));
345 for (auto Reg : AMDGPU::VGPR_16RegClass) {
347 RegPressureIgnoredUnits.set(
348 static_cast<unsigned>(*regunits(Reg).begin()));
349 }
350
351
353
354 static auto InitializeRegSplitPartsOnce = [this]() {
355 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
356 unsigned Size = getSubRegIdxSize(Idx);
357 if (Size & 15)
358 continue;
359 std::vector<int16_t> &Vec = RegSplitParts[Size / 16 - 1];
360 unsigned Pos = getSubRegIdxOffset(Idx);
361 if (Pos % Size)
362 continue;
364 if (Vec.empty()) {
365 unsigned MaxNumParts = 1024 / Size;
366 Vec.resize(MaxNumParts);
367 }
368 Vec[Pos] = Idx;
369 }
370 };
371
372 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
373
374 static auto InitializeSubRegFromChannelTableOnce = [this]() {
375 for (auto &Row : SubRegFromChannelTable)
376 Row.fill(AMDGPU::NoSubRegister);
377 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
378 unsigned Width = getSubRegIdxSize(Idx) / 32;
379 unsigned Offset = getSubRegIdxOffset(Idx) / 32;
382 if (Width == 0)
383 continue;
384 unsigned TableIdx = Width - 1;
385 assert(TableIdx < SubRegFromChannelTable.size());
387 SubRegFromChannelTable[TableIdx][Offset] = Idx;
388 }
389 };
390
391 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
393 InitializeSubRegFromChannelTableOnce);
394}
395
400}
401
402
406 switch (CC) {
410 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
411 : CSR_AMDGPU_SaveList;
414 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
415 : CSR_AMDGPU_SI_Gfx_SaveList;
417 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
418 default: {
419
420 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
421 return &NoCalleeSavedReg;
422 }
423 }
424}
425
430
433 switch (CC) {
437 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
438 : CSR_AMDGPU_RegMask;
441 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
442 : CSR_AMDGPU_SI_Gfx_RegMask;
445
446
447 return AMDGPU_AllVGPRs_RegMask;
448 default:
449 return nullptr;
450 }
451}
452
454 return CSR_AMDGPU_NoRegs_RegMask;
455}
456
458 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
459}
460
464
465
466
467
469 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
470 return &AMDGPU::AV_32RegClass;
471 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
472 return &AMDGPU::AV_64RegClass;
473 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
474 RC == &AMDGPU::AReg_64_Align2RegClass)
475 return &AMDGPU::AV_64_Align2RegClass;
476 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
477 return &AMDGPU::AV_96RegClass;
478 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
479 RC == &AMDGPU::AReg_96_Align2RegClass)
480 return &AMDGPU::AV_96_Align2RegClass;
481 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
482 return &AMDGPU::AV_128RegClass;
483 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
484 RC == &AMDGPU::AReg_128_Align2RegClass)
485 return &AMDGPU::AV_128_Align2RegClass;
486 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
487 return &AMDGPU::AV_160RegClass;
488 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
489 RC == &AMDGPU::AReg_160_Align2RegClass)
490 return &AMDGPU::AV_160_Align2RegClass;
491 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
492 return &AMDGPU::AV_192RegClass;
493 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
494 RC == &AMDGPU::AReg_192_Align2RegClass)
495 return &AMDGPU::AV_192_Align2RegClass;
496 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
497 return &AMDGPU::AV_256RegClass;
498 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
499 RC == &AMDGPU::AReg_256_Align2RegClass)
500 return &AMDGPU::AV_256_Align2RegClass;
501 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
502 return &AMDGPU::AV_512RegClass;
503 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
504 RC == &AMDGPU::AReg_512_Align2RegClass)
505 return &AMDGPU::AV_512_Align2RegClass;
506 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
507 return &AMDGPU::AV_1024RegClass;
508 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
509 RC == &AMDGPU::AReg_1024_Align2RegClass)
510 return &AMDGPU::AV_1024_Align2RegClass;
511 }
512
514}
515
519
520
521
522
523
526 }
529}
530
536
538
540 return AMDGPU_AllVGPRs_RegMask;
541}
542
544 return AMDGPU_AllAGPRs_RegMask;
545}
546
548 return AMDGPU_AllVectorRegs_RegMask;
549}
550
552 return AMDGPU_AllAllocatableSRegs_RegMask;
553}
554
556 unsigned NumRegs) {
559 assert(NumRegIndex && "Not implemented");
560 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
561 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
562}
563
566 const unsigned Align,
569 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
570 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
571}
572
577
581
583
584
585
586
587
588 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
589 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
590
591
592 reserveRegisterTuples(Reserved, AMDGPU::M0);
593
594
595 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
596 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
597 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
598
599
600 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
601 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
602 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
603 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
604 reserveRegisterTuples(Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_LO);
605 reserveRegisterTuples(Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_HI);
606
607
608 reserveRegisterTuples(Reserved, AMDGPU::ASYNCcnt);
609 reserveRegisterTuples(Reserved, AMDGPU::TENSORcnt);
610
611
612 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
613
614
615 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
616
617
618 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
619
620
621 reserveRegisterTuples(Reserved, AMDGPU::TBA);
622 reserveRegisterTuples(Reserved, AMDGPU::TMA);
623 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
624 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
625 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
626 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
627 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
628 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
629 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
630 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
631
632
633 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
634
635
636
637 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
638 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
640 if (RC->isBaseClass() && isSGPRClass(RC)) {
641 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
644 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
646 }
647 }
648 }
649
651 if (ScratchRSrcReg != AMDGPU::NoRegister) {
652
653
654
655 reserveRegisterTuples(Reserved, ScratchRSrcReg);
656 }
657
659 if (LongBranchReservedReg)
660 reserveRegisterTuples(Reserved, LongBranchReservedReg);
661
662
663
664
666 if (StackPtrReg) {
667 reserveRegisterTuples(Reserved, StackPtrReg);
668 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
669 }
670
672 if (FrameReg) {
673 reserveRegisterTuples(Reserved, FrameReg);
674 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
675 }
676
679 reserveRegisterTuples(Reserved, BasePtrReg);
680 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
681 }
682
683
684
686 if (ExecCopyReg)
687 reserveRegisterTuples(Reserved, ExecCopyReg);
688
689
690
691 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
692
694 if (RC->isBaseClass() && isVGPRClass(RC)) {
695 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
698 if (Index + NumRegs > MaxNumVGPRs)
700 }
701 }
702 }
703
704
705 if (!ST.hasMAIInsts())
706 MaxNumAGPRs = 0;
708 if (RC->isBaseClass() && isAGPRClass(RC)) {
709 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
712 if (Index + NumRegs > MaxNumAGPRs)
714 }
715 }
716 }
717
718
719
720 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
722 }
723
724
725
726
728 if (!NonWWMRegMask.empty()) {
729 for (unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
730 RegI < RegE; ++RegI) {
731 if (NonWWMRegMask.test(RegI))
732 reserveRegisterTuples(Reserved, RegI);
733 }
734 }
735
737 reserveRegisterTuples(Reserved, Reg);
738
739
741 reserveRegisterTuples(Reserved, Reg);
742
744 reserveRegisterTuples(Reserved, Reg);
745
747}
748
753
756
757
758
759
760
761 if (Info->isBottomOfStack())
762 return false;
763
765}
766
769 if (Info->isEntryFunction()) {
772 }
773
774
775 return true;
776}
777
780
781
782
783
784 return false;
785}
786
792
795
796 return true;
797}
798
801
802 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
803 AMDGPU::OpName::offset);
804 return MI->getOperand(OffIdx).getImm();
805}
806
808 int Idx) const {
809 switch (MI->getOpcode()) {
810 case AMDGPU::V_ADD_U32_e32:
811 case AMDGPU::V_ADD_U32_e64:
812 case AMDGPU::V_ADD_CO_U32_e32: {
813 int OtherIdx = Idx == 1 ? 2 : 1;
815 return OtherOp.isImm() ? OtherOp.getImm() : 0;
816 }
817 case AMDGPU::V_ADD_CO_U32_e64: {
818 int OtherIdx = Idx == 2 ? 3 : 2;
820 return OtherOp.isImm() ? OtherOp.getImm() : 0;
821 }
822 default:
823 break;
824 }
825
827 return 0;
828
829 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
830 AMDGPU::OpName::vaddr) ||
831 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
832 AMDGPU::OpName::saddr))) &&
833 "Should never see frame index on non-address operand");
834
836}
837
840 assert(MI.getDesc().isAdd());
843
844 if (Src0.isFI()) {
845 return Src1.isImm() || (Src1.isReg() && TRI.isVGPR(MI.getMF()->getRegInfo(),
847 }
848
849 if (Src1.isFI()) {
850 return Src0.isImm() || (Src0.isReg() && TRI.isVGPR(MI.getMF()->getRegInfo(),
852 }
853
854 return false;
855}
856
858
859 switch (MI->getOpcode()) {
860 case AMDGPU::V_ADD_U32_e32: {
861
862
863 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e32) < 2 &&
865 return false;
866 [[fallthrough]];
867 }
868 case AMDGPU::V_ADD_U32_e64:
869
870
871
872
873
874
875
876
877 return !ST.enableFlatScratch();
878 case AMDGPU::V_ADD_CO_U32_e32:
879 if (ST.getConstantBusLimit(AMDGPU::V_ADD_CO_U32_e32) < 2 &&
881 return false;
882
883
884 return MI->getOperand(3).isDead();
885 case AMDGPU::V_ADD_CO_U32_e64:
886
887 return MI->getOperand(1).isDead();
888 default:
889 break;
890 }
891
893 return false;
894
896
899 return ->isLegalMUBUFImmOffset(FullOffset);
900
903}
904
906 int FrameIdx,
907 int64_t Offset) const {
910
911 if (Ins != MBB->end())
912 DL = Ins->getDebugLoc();
913
917 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
918 : AMDGPU::V_MOV_B32_e32;
919
920 Register BaseReg = MRI.createVirtualRegister(
921 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
922 : &AMDGPU::VGPR_32RegClass);
923
927 return BaseReg;
928 }
929
930 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
931
932 Register FIReg = MRI.createVirtualRegister(
933 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
934 : &AMDGPU::VGPR_32RegClass);
935
936 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
940
941 if (ST.enableFlatScratch() ) {
942
943 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
947 return BaseReg;
948 }
949
950 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
952 .addReg(FIReg)
953 .addImm(0);
954
955 return BaseReg;
956}
957
959 int64_t Offset) const {
961
962 switch (MI.getOpcode()) {
963 case AMDGPU::V_ADD_U32_e32:
964 case AMDGPU::V_ADD_CO_U32_e32: {
967 if (!FIOp->isFI())
969
970 if (!ImmOp->isImm()) {
973 TII->legalizeOperandsVOP2(MI.getMF()->getRegInfo(), MI);
974 return;
975 }
976
977 int64_t TotalOffset = ImmOp->getImm() + Offset;
978 if (TotalOffset == 0) {
979 MI.setDesc(TII->get(AMDGPU::COPY));
980 for (unsigned I = MI.getNumOperands() - 1; I != 1; --I)
982
983 MI.getOperand(1).ChangeToRegister(BaseReg, false);
984 return;
985 }
986
987 ImmOp->setImm(TotalOffset);
988
992
993
994
995
996
999 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1000 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), BaseRegVGPR)
1002 MI.getOperand(2).ChangeToRegister(BaseRegVGPR, false);
1003 } else {
1004 MI.getOperand(2).ChangeToRegister(BaseReg, false);
1005 }
1006 return;
1007 }
1008 case AMDGPU::V_ADD_U32_e64:
1009 case AMDGPU::V_ADD_CO_U32_e64: {
1010 int Src0Idx = MI.getNumExplicitDefs();
1013 if (!FIOp->isFI())
1015
1016 if (!ImmOp->isImm()) {
1018 TII->legalizeOperandsVOP3(MI.getMF()->getRegInfo(), MI);
1019 return;
1020 }
1021
1022 int64_t TotalOffset = ImmOp->getImm() + Offset;
1023 if (TotalOffset == 0) {
1024 MI.setDesc(TII->get(AMDGPU::COPY));
1025
1026 for (unsigned I = MI.getNumOperands() - 1; I != 1; --I)
1028
1029 MI.getOperand(1).ChangeToRegister(BaseReg, false);
1030 } else {
1032 ImmOp->setImm(TotalOffset);
1033 }
1034
1035 return;
1036 }
1037 default:
1038 break;
1039 }
1040
1041 bool IsFlat = TII->isFLATScratch(MI);
1042
1043#ifndef NDEBUG
1044
1045 bool SeenFI = false;
1047 if (MO.isFI()) {
1048 if (SeenFI)
1050
1051 SeenFI = true;
1052 }
1053 }
1054#endif
1055
1057 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
1058 : AMDGPU::OpName::vaddr);
1059
1062
1063 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
1065
1066 if (IsFlat) {
1069 "offset should be legal");
1071 OffsetOp->setImm(NewOffset);
1072 return;
1073 }
1074
1075#ifndef NDEBUG
1076 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
1078#endif
1079
1080 assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal");
1081
1083 OffsetOp->setImm(NewOffset);
1084}
1085
1088 int64_t Offset) const {
1089
1090 switch (MI->getOpcode()) {
1091 case AMDGPU::V_ADD_U32_e32:
1092 case AMDGPU::V_ADD_CO_U32_e32:
1093 return true;
1094 case AMDGPU::V_ADD_U32_e64:
1095 case AMDGPU::V_ADD_CO_U32_e64:
1097 default:
1098 break;
1099 }
1100
1102 return false;
1103
1105
1108 return TII->isLegalMUBUFImmOffset(NewOffset);
1109
1112}
1113
1116
1117
1118
1119 return &AMDGPU::VGPR_32RegClass;
1120}
1121
1124 return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;
1125}
1126
1129
1130 unsigned Op = MI.getOpcode();
1131 switch (Op) {
1132 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
1133 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
1134
1135
1136
1138 (uint64_t)TII->getNamedOperand(MI, AMDGPU::OpName::mask)->getImm());
1139 case AMDGPU::SI_SPILL_S1024_SAVE:
1140 case AMDGPU::SI_SPILL_S1024_RESTORE:
1141 case AMDGPU::SI_SPILL_V1024_SAVE:
1142 case AMDGPU::SI_SPILL_V1024_RESTORE:
1143 case AMDGPU::SI_SPILL_A1024_SAVE:
1144 case AMDGPU::SI_SPILL_A1024_RESTORE:
1145 case AMDGPU::SI_SPILL_AV1024_SAVE:
1146 case AMDGPU::SI_SPILL_AV1024_RESTORE:
1147 return 32;
1148 case AMDGPU::SI_SPILL_S512_SAVE:
1149 case AMDGPU::SI_SPILL_S512_RESTORE:
1150 case AMDGPU::SI_SPILL_V512_SAVE:
1151 case AMDGPU::SI_SPILL_V512_RESTORE:
1152 case AMDGPU::SI_SPILL_A512_SAVE:
1153 case AMDGPU::SI_SPILL_A512_RESTORE:
1154 case AMDGPU::SI_SPILL_AV512_SAVE:
1155 case AMDGPU::SI_SPILL_AV512_RESTORE:
1156 return 16;
1157 case AMDGPU::SI_SPILL_S384_SAVE:
1158 case AMDGPU::SI_SPILL_S384_RESTORE:
1159 case AMDGPU::SI_SPILL_V384_SAVE:
1160 case AMDGPU::SI_SPILL_V384_RESTORE:
1161 case AMDGPU::SI_SPILL_A384_SAVE:
1162 case AMDGPU::SI_SPILL_A384_RESTORE:
1163 case AMDGPU::SI_SPILL_AV384_SAVE:
1164 case AMDGPU::SI_SPILL_AV384_RESTORE:
1165 return 12;
1166 case AMDGPU::SI_SPILL_S352_SAVE:
1167 case AMDGPU::SI_SPILL_S352_RESTORE:
1168 case AMDGPU::SI_SPILL_V352_SAVE:
1169 case AMDGPU::SI_SPILL_V352_RESTORE:
1170 case AMDGPU::SI_SPILL_A352_SAVE:
1171 case AMDGPU::SI_SPILL_A352_RESTORE:
1172 case AMDGPU::SI_SPILL_AV352_SAVE:
1173 case AMDGPU::SI_SPILL_AV352_RESTORE:
1174 return 11;
1175 case AMDGPU::SI_SPILL_S320_SAVE:
1176 case AMDGPU::SI_SPILL_S320_RESTORE:
1177 case AMDGPU::SI_SPILL_V320_SAVE:
1178 case AMDGPU::SI_SPILL_V320_RESTORE:
1179 case AMDGPU::SI_SPILL_A320_SAVE:
1180 case AMDGPU::SI_SPILL_A320_RESTORE:
1181 case AMDGPU::SI_SPILL_AV320_SAVE:
1182 case AMDGPU::SI_SPILL_AV320_RESTORE:
1183 return 10;
1184 case AMDGPU::SI_SPILL_S288_SAVE:
1185 case AMDGPU::SI_SPILL_S288_RESTORE:
1186 case AMDGPU::SI_SPILL_V288_SAVE:
1187 case AMDGPU::SI_SPILL_V288_RESTORE:
1188 case AMDGPU::SI_SPILL_A288_SAVE:
1189 case AMDGPU::SI_SPILL_A288_RESTORE:
1190 case AMDGPU::SI_SPILL_AV288_SAVE:
1191 case AMDGPU::SI_SPILL_AV288_RESTORE:
1192 return 9;
1193 case AMDGPU::SI_SPILL_S256_SAVE:
1194 case AMDGPU::SI_SPILL_S256_RESTORE:
1195 case AMDGPU::SI_SPILL_V256_SAVE:
1196 case AMDGPU::SI_SPILL_V256_RESTORE:
1197 case AMDGPU::SI_SPILL_A256_SAVE:
1198 case AMDGPU::SI_SPILL_A256_RESTORE:
1199 case AMDGPU::SI_SPILL_AV256_SAVE:
1200 case AMDGPU::SI_SPILL_AV256_RESTORE:
1201 return 8;
1202 case AMDGPU::SI_SPILL_S224_SAVE:
1203 case AMDGPU::SI_SPILL_S224_RESTORE:
1204 case AMDGPU::SI_SPILL_V224_SAVE:
1205 case AMDGPU::SI_SPILL_V224_RESTORE:
1206 case AMDGPU::SI_SPILL_A224_SAVE:
1207 case AMDGPU::SI_SPILL_A224_RESTORE:
1208 case AMDGPU::SI_SPILL_AV224_SAVE:
1209 case AMDGPU::SI_SPILL_AV224_RESTORE:
1210 return 7;
1211 case AMDGPU::SI_SPILL_S192_SAVE:
1212 case AMDGPU::SI_SPILL_S192_RESTORE:
1213 case AMDGPU::SI_SPILL_V192_SAVE:
1214 case AMDGPU::SI_SPILL_V192_RESTORE:
1215 case AMDGPU::SI_SPILL_A192_SAVE:
1216 case AMDGPU::SI_SPILL_A192_RESTORE:
1217 case AMDGPU::SI_SPILL_AV192_SAVE:
1218 case AMDGPU::SI_SPILL_AV192_RESTORE:
1219 return 6;
1220 case AMDGPU::SI_SPILL_S160_SAVE:
1221 case AMDGPU::SI_SPILL_S160_RESTORE:
1222 case AMDGPU::SI_SPILL_V160_SAVE:
1223 case AMDGPU::SI_SPILL_V160_RESTORE:
1224 case AMDGPU::SI_SPILL_A160_SAVE:
1225 case AMDGPU::SI_SPILL_A160_RESTORE:
1226 case AMDGPU::SI_SPILL_AV160_SAVE:
1227 case AMDGPU::SI_SPILL_AV160_RESTORE:
1228 return 5;
1229 case AMDGPU::SI_SPILL_S128_SAVE:
1230 case AMDGPU::SI_SPILL_S128_RESTORE:
1231 case AMDGPU::SI_SPILL_V128_SAVE:
1232 case AMDGPU::SI_SPILL_V128_RESTORE:
1233 case AMDGPU::SI_SPILL_A128_SAVE:
1234 case AMDGPU::SI_SPILL_A128_RESTORE:
1235 case AMDGPU::SI_SPILL_AV128_SAVE:
1236 case AMDGPU::SI_SPILL_AV128_RESTORE:
1237 return 4;
1238 case AMDGPU::SI_SPILL_S96_SAVE:
1239 case AMDGPU::SI_SPILL_S96_RESTORE:
1240 case AMDGPU::SI_SPILL_V96_SAVE:
1241 case AMDGPU::SI_SPILL_V96_RESTORE:
1242 case AMDGPU::SI_SPILL_A96_SAVE:
1243 case AMDGPU::SI_SPILL_A96_RESTORE:
1244 case AMDGPU::SI_SPILL_AV96_SAVE:
1245 case AMDGPU::SI_SPILL_AV96_RESTORE:
1246 return 3;
1247 case AMDGPU::SI_SPILL_S64_SAVE:
1248 case AMDGPU::SI_SPILL_S64_RESTORE:
1249 case AMDGPU::SI_SPILL_V64_SAVE:
1250 case AMDGPU::SI_SPILL_V64_RESTORE:
1251 case AMDGPU::SI_SPILL_A64_SAVE:
1252 case AMDGPU::SI_SPILL_A64_RESTORE:
1253 case AMDGPU::SI_SPILL_AV64_SAVE:
1254 case AMDGPU::SI_SPILL_AV64_RESTORE:
1255 return 2;
1256 case AMDGPU::SI_SPILL_S32_SAVE:
1257 case AMDGPU::SI_SPILL_S32_RESTORE:
1258 case AMDGPU::SI_SPILL_V32_SAVE:
1259 case AMDGPU::SI_SPILL_V32_RESTORE:
1260 case AMDGPU::SI_SPILL_A32_SAVE:
1261 case AMDGPU::SI_SPILL_A32_RESTORE:
1262 case AMDGPU::SI_SPILL_AV32_SAVE:
1263 case AMDGPU::SI_SPILL_AV32_RESTORE:
1264 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1265 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1266 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1267 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1268 case AMDGPU::SI_SPILL_V16_SAVE:
1269 case AMDGPU::SI_SPILL_V16_RESTORE:
1270 return 1;
1272 }
1273}
1274
1276 switch (Opc) {
1277 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1278 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1279 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1280 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1281 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1282 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1283 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1284 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1285 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1286 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1287 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1288 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1289 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1290 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1291 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1292 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1293 default:
1294 return -1;
1295 }
1296}
1297
1299 switch (Opc) {
1300 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1301 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1302 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1303 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1304 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1305 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1306 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1307 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1308 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1309 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1310 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1311 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1312 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1313 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1314 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1315 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1316 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1317 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1318 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1319 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1320 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1321 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1322 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1323 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1324 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1325 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1326 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1327 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1328 default:
1329 return -1;
1330 }
1331}
1332
1334 switch (Opc) {
1335 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1336 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1337 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1338 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1339 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1340 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1341 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1342 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1343 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1344 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1345 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1346 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1347 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1348 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1349 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1350 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1351 default:
1352 return -1;
1353 }
1354}
1355
1357 switch (Opc) {
1358 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1359 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1360 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1361 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1362 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1363 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1364 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1365 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1366 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1367 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1368 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1369 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1370 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1371 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1372 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1373 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1374 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1375 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1376 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1377 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1378 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1379 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1380 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1381 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1382 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1383 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1384 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1385 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1386 default:
1387 return -1;
1388 }
1389}
1390
1394 int Index, unsigned Lane,
1395 unsigned ValueReg, bool IsKill) {
1399
1401
1402 if (Reg == AMDGPU::NoRegister)
1404
1405 bool IsStore = MI->mayStore();
1407 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1408
1409 unsigned Dst = IsStore ? Reg : ValueReg;
1410 unsigned Src = IsStore ? ValueReg : Reg;
1411 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1413 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1414
1415
1416
1417
1418 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1421 return CopyMIB;
1422 }
1423 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1424 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1425
1429 return MIB;
1430}
1431
1432
1433
1437 int Index,
1442 bool IsStore = MI->mayStore();
1443
1444 unsigned Opc = MI->getOpcode();
1445 int LoadStoreOp = IsStore ?
1447 if (LoadStoreOp == -1)
1448 return false;
1449
1452 return true;
1453
1457 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1458 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1463
1465 AMDGPU::OpName::vdata_in);
1466 if (VDataIn)
1467 NewMI.add(*VDataIn);
1468 return true;
1469}
1470
1472 unsigned LoadStoreOp,
1473 unsigned EltSize) {
1474 bool IsStore = TII->get(LoadStoreOp).mayStore();
1476 bool UseST =
1478
1479
1480 if (TII->isBlockLoadStore(LoadStoreOp))
1481 return LoadStoreOp;
1482
1483 switch (EltSize) {
1484 case 4:
1485 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1486 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1487 break;
1488 case 8:
1489 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1490 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1491 break;
1492 case 12:
1493 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1494 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1495 break;
1496 case 16:
1497 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1498 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1499 break;
1500 default:
1502 }
1503
1504 if (HasVAddr)
1506 else if (UseST)
1508
1509 return LoadStoreOp;
1510}
1511
1514 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1517 assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
1518
1523
1525 bool IsStore = Desc->mayStore();
1526 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1527 bool IsBlock = TII->isBlockLoadStore(LoadStoreOp);
1528
1529 bool CanClobberSCC = false;
1530 bool Scavenged = false;
1531 MCRegister SOffset = ScratchOffsetReg;
1532
1534
1535 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1537
1538
1539
1540
1541 unsigned EltSize = IsBlock ? RegWidth
1542 : (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u)
1543 : 4u;
1544 unsigned NumSubRegs = RegWidth / EltSize;
1545 unsigned Size = NumSubRegs * EltSize;
1546 unsigned RemSize = RegWidth - Size;
1547 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1549 int64_t MaterializedOffset = Offset;
1550
1551 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1552 int64_t ScratchOffsetRegDelta = 0;
1553
1554 if (IsFlat && EltSize > 4) {
1556 Desc = &TII->get(LoadStoreOp);
1557 }
1558
1561
1562 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1563 "unexpected VGPR spill offset");
1564
1565
1567
1568
1569 Register TmpIntermediateVGPR;
1570 bool UseVGPROffset = false;
1571
1572
1573
1574 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1575 int64_t VOffset) {
1576
1577 if (IsFlat && SGPRBase) {
1578
1579
1580
1581
1582 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1586 .addImm(0);
1587 } else {
1592 .addReg(TmpOffsetVGPR);
1593 }
1594 } else {
1595 assert(TmpOffsetVGPR);
1598 }
1599 };
1600
1601 bool IsOffsetLegal =
1604 : TII->isLegalMUBUFImmOffset(MaxOffset);
1605 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1607
1608
1609
1610
1611
1612 if (RS) {
1613 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1614
1615
1616 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1617 } else if (LiveUnits) {
1618 CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
1619 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1621 SOffset = Reg;
1622 break;
1623 }
1624 }
1625 }
1626
1627 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1629
1630 if (!SOffset) {
1631 UseVGPROffset = true;
1632
1633 if (RS) {
1634 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1635 } else {
1637 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1639 TmpOffsetVGPR = Reg;
1640 break;
1641 }
1642 }
1643 }
1644
1645 assert(TmpOffsetVGPR);
1646 } else if (!SOffset && CanClobberSCC) {
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657 if (!ScratchOffsetReg)
1659 SOffset = ScratchOffsetReg;
1660 ScratchOffsetRegDelta = Offset;
1661 } else {
1662 Scavenged = true;
1663 }
1664
1665
1666
1667
1668 if (!IsFlat && !UseVGPROffset)
1669 Offset *= ST.getWavefrontSize();
1670
1671 if (!UseVGPROffset && !SOffset)
1672 report_fatal_error("could not scavenge SGPR to spill in entry function");
1673
1674 if (UseVGPROffset) {
1675
1676 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1677 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1679 } else {
1682 .addReg(ScratchOffsetReg)
1684 Add->getOperand(3).setIsDead();
1685 }
1686
1688 }
1689
1690 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1691 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1692 && "Unexpected vaddr for flat scratch with a FI operand");
1693
1694 if (UseVGPROffset) {
1696 } else {
1697 assert(ST.hasFlatScratchSTMode());
1698 assert(->isBlockLoadStore(LoadStoreOp) && "Block ops don't have ST");
1700 }
1701
1702 Desc = &TII->get(LoadStoreOp);
1703 }
1704
1705 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1706 ++i, RegOffset += EltSize) {
1707 if (i == NumSubRegs) {
1708 EltSize = RemSize;
1710 }
1711 Desc = &TII->get(LoadStoreOp);
1712
1713 if (!IsFlat && UseVGPROffset) {
1716 Desc = &TII->get(NewLoadStoreOp);
1717 }
1718
1719 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1720
1721
1722
1723
1724
1725
1726 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1727 }
1728
1729 unsigned NumRegs = EltSize / 4;
1731 ? ValueReg
1732 : Register(getSubReg(ValueReg,
1734
1735 unsigned SOffsetRegState = 0;
1737 const bool IsLastSubReg = i + 1 == e;
1738 const bool IsFirstSubReg = i == 0;
1739 if (IsLastSubReg) {
1741
1743 }
1744
1745
1746
1747 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1748 bool NeedSuperRegImpOperand = e > 1;
1749
1750
1751
1752 unsigned RemEltSize = EltSize;
1753
1754
1755
1756
1757
1758
1759
1760 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1761 LaneE = RegOffset / 4;
1762 Lane >= LaneE; --Lane) {
1763 bool IsSubReg = e > 1 || EltSize > 4;
1766 : ValueReg;
1768 if (!MIB.getInstr())
1769 break;
1770 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1772 NeedSuperRegDef = false;
1773 }
1774 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1775 NeedSuperRegImpOperand = true;
1776 unsigned State = SrcDstRegState;
1777 if (!IsLastSubReg || (Lane != LaneE))
1779 if (!IsFirstSubReg || (Lane != LaneS))
1782 }
1783 RemEltSize -= 4;
1784 }
1785
1786 if (!RemEltSize)
1787 continue;
1788
1789 if (RemEltSize != EltSize) {
1790 assert(IsFlat && EltSize > 4);
1791
1792 unsigned NumRegs = RemEltSize / 4;
1797 }
1798
1799 unsigned FinalReg = SubReg;
1800
1801 if (IsAGPR) {
1802 assert(EltSize == 4);
1803
1804 if (!TmpIntermediateVGPR) {
1807 }
1808 if (IsStore) {
1810 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1811 TmpIntermediateVGPR)
1813 if (NeedSuperRegDef)
1815 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1818 }
1819 SubReg = TmpIntermediateVGPR;
1820 } else if (UseVGPROffset) {
1821 if (!TmpOffsetVGPR) {
1822 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1823 MI, false, 0);
1824 RS->setRegUsed(TmpOffsetVGPR);
1825 }
1826 }
1827
1828 Register FinalValueReg = ValueReg;
1829 if (LoadStoreOp == AMDGPU::SCRATCH_LOAD_USHORT_SADDR) {
1830
1831
1832 ValueReg =
1833 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1835 IsKill = false;
1836 }
1837
1842
1843 auto MIB =
1846
1847 if (UseVGPROffset) {
1848
1849
1850 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1851 }
1852
1853 if (!IsFlat)
1855
1856 if (SOffset == AMDGPU::NoRegister) {
1857 if (!IsFlat) {
1858 if (UseVGPROffset && ScratchOffsetReg) {
1859 MIB.addReg(ScratchOffsetReg);
1860 } else {
1862 MIB.addImm(0);
1863 }
1864 }
1865 } else {
1866 MIB.addReg(SOffset, SOffsetRegState);
1867 }
1868
1870
1873
1874 if (!IsFlat)
1875 MIB.addImm(0);
1876 MIB.addMemOperand(NewMMO);
1877
1878 if (FinalValueReg != ValueReg) {
1879
1880 ValueReg = getSubReg(ValueReg, AMDGPU::lo16);
1886 ValueReg = FinalValueReg;
1887 }
1888
1889 if (!IsAGPR && NeedSuperRegDef)
1891
1892 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1893 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1894 FinalReg)
1897 }
1898
1900 bool PartialReloadCopy = (RemEltSize != EltSize) && !IsStore;
1901 if (NeedSuperRegImpOperand &&
1902 (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) {
1904 if (PartialReloadCopy)
1906 }
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929 if (!IsStore && MI != MBB.end() && MI->isReturn() &&
1930 MI->readsRegister(SubReg, this)) {
1932 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1933 }
1934
1935
1936
1937
1938
1939
1940 if (!IsStore && TII->isBlockLoadStore(LoadStoreOp))
1942 }
1943
1944 if (ScratchOffsetRegDelta != 0) {
1945
1948 .addImm(-ScratchOffsetRegDelta);
1949 }
1950}
1951
1957 Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);
1958 for (unsigned RegOffset = 1; RegOffset < 32; ++RegOffset)
1959 if (!(Mask & (1 << RegOffset)) &&
1960 isCalleeSavedPhysReg(BaseVGPR + RegOffset, *MF))
1962}
1963
1965 int Offset, bool IsLoad,
1966 bool IsKill) const {
1967
1970
1975
1981
1982 if (IsLoad) {
1983 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1984 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1987 } else {
1988 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1989 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1992
1994 }
1995}
1996
2000 bool SpillToPhysVGPRLane) const {
2001 assert(->getOperand(0).isUndef() &&
2002 "undef spill should have been deleted earlier");
2003
2004 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
2005
2009 bool SpillToVGPR = !VGPRSpills.empty();
2010 if (OnlyToVGPR && !SpillToVGPR)
2011 return false;
2012
2015
2016 if (SpillToVGPR) {
2017
2018
2019
2020
2021
2023 "Num of SGPRs spilled should be less than or equal to num of "
2024 "the VGPR lanes.");
2025
2026 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
2032
2033 bool IsFirstSubreg = i == 0;
2034 bool IsLastSubreg = i == SB.NumSubRegs - 1;
2035 bool UseKill = SB.IsKill && IsLastSubreg;
2036
2037
2038
2039
2041 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
2044 .addReg(Spill.VGPR);
2045 if (Indexes) {
2046 if (IsFirstSubreg)
2048 else
2050 }
2051
2052 if (IsFirstSubreg && SB.NumSubRegs > 1) {
2053
2054
2056 }
2057
2058 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
2060
2061
2062
2063
2064 }
2065 } else {
2067
2068
2070
2071
2073
2076
2077
2078 for (unsigned i = Offset * PVD.PerVGPR,
2080 i < e; ++i) {
2085
2088 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
2090 .addImm(i % PVD.PerVGPR)
2092 TmpVGPRFlags = 0;
2093
2094 if (Indexes) {
2095 if (i == 0)
2097 else
2099 }
2100
2101
2102
2104
2105 unsigned SuperKillState = 0;
2109 }
2110 }
2111
2112
2114 }
2115
2117 }
2118
2119 MI->eraseFromParent();
2121
2122 if (LIS)
2124
2125 return true;
2126}
2127
2131 bool SpillToPhysVGPRLane) const {
2132 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
2133
2137 bool SpillToVGPR = !VGPRSpills.empty();
2138 if (OnlyToVGPR && !SpillToVGPR)
2139 return false;
2140
2141 if (SpillToVGPR) {
2142 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
2147
2150 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
2152 .addImm(Spill.Lane);
2155 if (Indexes) {
2156 if (i == e - 1)
2158 else
2160 }
2161 }
2162 } else {
2164
2165
2167
2169
2171
2172
2173 for (unsigned i = Offset * PVD.PerVGPR,
2175 i < e; ++i) {
2180
2181 bool LastSubReg = (i + 1 == e);
2183 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
2188 if (Indexes) {
2189 if (i == e - 1)
2191 else
2193 }
2194 }
2195 }
2196
2198 }
2199
2200 MI->eraseFromParent();
2201
2202 if (LIS)
2204
2205 return true;
2206}
2207
2211 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
2212 RS);
2214
2219
2220 for (unsigned i = Offset * PVD.PerVGPR,
2222 i < e; ++i) {
2227
2229 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
2232 .addImm(i % PVD.PerVGPR)
2234 TmpVGPRFlags = 0;
2235
2236
2238
2239 unsigned SuperKillState = 0;
2243 }
2244 }
2245
2246 }
2247
2248
2250 SB.setMI(&RestoreMBB, MI);
2251
2253
2254
2255 for (unsigned i = Offset * PVD.PerVGPR,
2257 i < e; ++i) {
2262
2264 bool LastSubReg = (i + 1 == e);
2265 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
2271 }
2272 }
2274
2276 return false;
2277}
2278
2279
2280
2281
2285 switch (MI->getOpcode()) {
2286 case AMDGPU::SI_SPILL_S1024_SAVE:
2287 case AMDGPU::SI_SPILL_S512_SAVE:
2288 case AMDGPU::SI_SPILL_S384_SAVE:
2289 case AMDGPU::SI_SPILL_S352_SAVE:
2290 case AMDGPU::SI_SPILL_S320_SAVE:
2291 case AMDGPU::SI_SPILL_S288_SAVE:
2292 case AMDGPU::SI_SPILL_S256_SAVE:
2293 case AMDGPU::SI_SPILL_S224_SAVE:
2294 case AMDGPU::SI_SPILL_S192_SAVE:
2295 case AMDGPU::SI_SPILL_S160_SAVE:
2296 case AMDGPU::SI_SPILL_S128_SAVE:
2297 case AMDGPU::SI_SPILL_S96_SAVE:
2298 case AMDGPU::SI_SPILL_S64_SAVE:
2299 case AMDGPU::SI_SPILL_S32_SAVE:
2300 return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2301 case AMDGPU::SI_SPILL_S1024_RESTORE:
2302 case AMDGPU::SI_SPILL_S512_RESTORE:
2303 case AMDGPU::SI_SPILL_S384_RESTORE:
2304 case AMDGPU::SI_SPILL_S352_RESTORE:
2305 case AMDGPU::SI_SPILL_S320_RESTORE:
2306 case AMDGPU::SI_SPILL_S288_RESTORE:
2307 case AMDGPU::SI_SPILL_S256_RESTORE:
2308 case AMDGPU::SI_SPILL_S224_RESTORE:
2309 case AMDGPU::SI_SPILL_S192_RESTORE:
2310 case AMDGPU::SI_SPILL_S160_RESTORE:
2311 case AMDGPU::SI_SPILL_S128_RESTORE:
2312 case AMDGPU::SI_SPILL_S96_RESTORE:
2313 case AMDGPU::SI_SPILL_S64_RESTORE:
2314 case AMDGPU::SI_SPILL_S32_RESTORE:
2315 return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2316 default:
2318 }
2319}
2320
2322 int SPAdj, unsigned FIOperandNum,
2330
2331 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2332
2334 "unreserved scratch RSRC register");
2335
2337 int Index = MI->getOperand(FIOperandNum).getIndex();
2338
2342
2343 switch (MI->getOpcode()) {
2344
2345 case AMDGPU::SI_SPILL_S1024_SAVE:
2346 case AMDGPU::SI_SPILL_S512_SAVE:
2347 case AMDGPU::SI_SPILL_S384_SAVE:
2348 case AMDGPU::SI_SPILL_S352_SAVE:
2349 case AMDGPU::SI_SPILL_S320_SAVE:
2350 case AMDGPU::SI_SPILL_S288_SAVE:
2351 case AMDGPU::SI_SPILL_S256_SAVE:
2352 case AMDGPU::SI_SPILL_S224_SAVE:
2353 case AMDGPU::SI_SPILL_S192_SAVE:
2354 case AMDGPU::SI_SPILL_S160_SAVE:
2355 case AMDGPU::SI_SPILL_S128_SAVE:
2356 case AMDGPU::SI_SPILL_S96_SAVE:
2357 case AMDGPU::SI_SPILL_S64_SAVE:
2358 case AMDGPU::SI_SPILL_S32_SAVE: {
2360 }
2361
2362
2363 case AMDGPU::SI_SPILL_S1024_RESTORE:
2364 case AMDGPU::SI_SPILL_S512_RESTORE:
2365 case AMDGPU::SI_SPILL_S384_RESTORE:
2366 case AMDGPU::SI_SPILL_S352_RESTORE:
2367 case AMDGPU::SI_SPILL_S320_RESTORE:
2368 case AMDGPU::SI_SPILL_S288_RESTORE:
2369 case AMDGPU::SI_SPILL_S256_RESTORE:
2370 case AMDGPU::SI_SPILL_S224_RESTORE:
2371 case AMDGPU::SI_SPILL_S192_RESTORE:
2372 case AMDGPU::SI_SPILL_S160_RESTORE:
2373 case AMDGPU::SI_SPILL_S128_RESTORE:
2374 case AMDGPU::SI_SPILL_S96_RESTORE:
2375 case AMDGPU::SI_SPILL_S64_RESTORE:
2376 case AMDGPU::SI_SPILL_S32_RESTORE: {
2378 }
2379
2380
2381 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: {
2382
2383 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
2384 AMDGPU::M0)
2385 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
2386 [[fallthrough]];
2387 }
2388 case AMDGPU::SI_SPILL_V1024_SAVE:
2389 case AMDGPU::SI_SPILL_V512_SAVE:
2390 case AMDGPU::SI_SPILL_V384_SAVE:
2391 case AMDGPU::SI_SPILL_V352_SAVE:
2392 case AMDGPU::SI_SPILL_V320_SAVE:
2393 case AMDGPU::SI_SPILL_V288_SAVE:
2394 case AMDGPU::SI_SPILL_V256_SAVE:
2395 case AMDGPU::SI_SPILL_V224_SAVE:
2396 case AMDGPU::SI_SPILL_V192_SAVE:
2397 case AMDGPU::SI_SPILL_V160_SAVE:
2398 case AMDGPU::SI_SPILL_V128_SAVE:
2399 case AMDGPU::SI_SPILL_V96_SAVE:
2400 case AMDGPU::SI_SPILL_V64_SAVE:
2401 case AMDGPU::SI_SPILL_V32_SAVE:
2402 case AMDGPU::SI_SPILL_V16_SAVE:
2403 case AMDGPU::SI_SPILL_A1024_SAVE:
2404 case AMDGPU::SI_SPILL_A512_SAVE:
2405 case AMDGPU::SI_SPILL_A384_SAVE:
2406 case AMDGPU::SI_SPILL_A352_SAVE:
2407 case AMDGPU::SI_SPILL_A320_SAVE:
2408 case AMDGPU::SI_SPILL_A288_SAVE:
2409 case AMDGPU::SI_SPILL_A256_SAVE:
2410 case AMDGPU::SI_SPILL_A224_SAVE:
2411 case AMDGPU::SI_SPILL_A192_SAVE:
2412 case AMDGPU::SI_SPILL_A160_SAVE:
2413 case AMDGPU::SI_SPILL_A128_SAVE:
2414 case AMDGPU::SI_SPILL_A96_SAVE:
2415 case AMDGPU::SI_SPILL_A64_SAVE:
2416 case AMDGPU::SI_SPILL_A32_SAVE:
2417 case AMDGPU::SI_SPILL_AV1024_SAVE:
2418 case AMDGPU::SI_SPILL_AV512_SAVE:
2419 case AMDGPU::SI_SPILL_AV384_SAVE:
2420 case AMDGPU::SI_SPILL_AV352_SAVE:
2421 case AMDGPU::SI_SPILL_AV320_SAVE:
2422 case AMDGPU::SI_SPILL_AV288_SAVE:
2423 case AMDGPU::SI_SPILL_AV256_SAVE:
2424 case AMDGPU::SI_SPILL_AV224_SAVE:
2425 case AMDGPU::SI_SPILL_AV192_SAVE:
2426 case AMDGPU::SI_SPILL_AV160_SAVE:
2427 case AMDGPU::SI_SPILL_AV128_SAVE:
2428 case AMDGPU::SI_SPILL_AV96_SAVE:
2429 case AMDGPU::SI_SPILL_AV64_SAVE:
2430 case AMDGPU::SI_SPILL_AV32_SAVE:
2431 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2432 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2434 AMDGPU::OpName::vdata);
2436 MI->eraseFromParent();
2437 return true;
2438 }
2439
2440 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2442
2443 unsigned Opc;
2444 if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) {
2445 assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");
2446 Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
2447 } else {
2448 Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE
2449 ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR
2450 : ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2451 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2452 }
2453
2454 auto *MBB = MI->getParent();
2455 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2456 if (IsWWMRegSpill) {
2458 RS->isRegUsed(AMDGPU::SCC));
2459 }
2462 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2463 *MI->memoperands_begin(), RS);
2465 if (IsWWMRegSpill)
2467
2468 MI->eraseFromParent();
2469 return true;
2470 }
2471 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: {
2472
2473 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
2474 AMDGPU::M0)
2475 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
2476 [[fallthrough]];
2477 }
2478 case AMDGPU::SI_SPILL_V16_RESTORE:
2479 case AMDGPU::SI_SPILL_V32_RESTORE:
2480 case AMDGPU::SI_SPILL_V64_RESTORE:
2481 case AMDGPU::SI_SPILL_V96_RESTORE:
2482 case AMDGPU::SI_SPILL_V128_RESTORE:
2483 case AMDGPU::SI_SPILL_V160_RESTORE:
2484 case AMDGPU::SI_SPILL_V192_RESTORE:
2485 case AMDGPU::SI_SPILL_V224_RESTORE:
2486 case AMDGPU::SI_SPILL_V256_RESTORE:
2487 case AMDGPU::SI_SPILL_V288_RESTORE:
2488 case AMDGPU::SI_SPILL_V320_RESTORE:
2489 case AMDGPU::SI_SPILL_V352_RESTORE:
2490 case AMDGPU::SI_SPILL_V384_RESTORE:
2491 case AMDGPU::SI_SPILL_V512_RESTORE:
2492 case AMDGPU::SI_SPILL_V1024_RESTORE:
2493 case AMDGPU::SI_SPILL_A32_RESTORE:
2494 case AMDGPU::SI_SPILL_A64_RESTORE:
2495 case AMDGPU::SI_SPILL_A96_RESTORE:
2496 case AMDGPU::SI_SPILL_A128_RESTORE:
2497 case AMDGPU::SI_SPILL_A160_RESTORE:
2498 case AMDGPU::SI_SPILL_A192_RESTORE:
2499 case AMDGPU::SI_SPILL_A224_RESTORE:
2500 case AMDGPU::SI_SPILL_A256_RESTORE:
2501 case AMDGPU::SI_SPILL_A288_RESTORE:
2502 case AMDGPU::SI_SPILL_A320_RESTORE:
2503 case AMDGPU::SI_SPILL_A352_RESTORE:
2504 case AMDGPU::SI_SPILL_A384_RESTORE:
2505 case AMDGPU::SI_SPILL_A512_RESTORE:
2506 case AMDGPU::SI_SPILL_A1024_RESTORE:
2507 case AMDGPU::SI_SPILL_AV32_RESTORE:
2508 case AMDGPU::SI_SPILL_AV64_RESTORE:
2509 case AMDGPU::SI_SPILL_AV96_RESTORE:
2510 case AMDGPU::SI_SPILL_AV128_RESTORE:
2511 case AMDGPU::SI_SPILL_AV160_RESTORE:
2512 case AMDGPU::SI_SPILL_AV192_RESTORE:
2513 case AMDGPU::SI_SPILL_AV224_RESTORE:
2514 case AMDGPU::SI_SPILL_AV256_RESTORE:
2515 case AMDGPU::SI_SPILL_AV288_RESTORE:
2516 case AMDGPU::SI_SPILL_AV320_RESTORE:
2517 case AMDGPU::SI_SPILL_AV352_RESTORE:
2518 case AMDGPU::SI_SPILL_AV384_RESTORE:
2519 case AMDGPU::SI_SPILL_AV512_RESTORE:
2520 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2521 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2522 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2524 AMDGPU::OpName::vdata);
2525 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2527
2528 unsigned Opc;
2529 if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {
2530 assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!");
2531 Opc = ST.d16PreservesUnusedBits()
2532 ? AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16
2533 : AMDGPU::SCRATCH_LOAD_USHORT_SADDR;
2534 } else {
2535 Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE
2536 ? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR
2537 : ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2538 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2539 }
2540
2541 auto *MBB = MI->getParent();
2542 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2543 if (IsWWMRegSpill) {
2545 RS->isRegUsed(AMDGPU::SCC));
2546 }
2547
2550 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2551 *MI->memoperands_begin(), RS);
2552
2553 if (IsWWMRegSpill)
2555
2556 MI->eraseFromParent();
2557 return true;
2558 }
2559 case AMDGPU::V_ADD_U32_e32:
2560 case AMDGPU::V_ADD_U32_e64:
2561 case AMDGPU::V_ADD_CO_U32_e32:
2562 case AMDGPU::V_ADD_CO_U32_e64: {
2563
2564 unsigned NumDefs = MI->getNumExplicitDefs();
2565 unsigned Src0Idx = NumDefs;
2566
2567 bool HasClamp = false;
2569
2570 switch (MI->getOpcode()) {
2571 case AMDGPU::V_ADD_U32_e32:
2572 break;
2573 case AMDGPU::V_ADD_U32_e64:
2574 HasClamp = MI->getOperand(3).getImm();
2575 break;
2576 case AMDGPU::V_ADD_CO_U32_e32:
2577 VCCOp = &MI->getOperand(3);
2578 break;
2579 case AMDGPU::V_ADD_CO_U32_e64:
2580 VCCOp = &MI->getOperand(1);
2581 HasClamp = MI->getOperand(4).getImm();
2582 break;
2583 default:
2584 break;
2585 }
2586 bool DeadVCC = !VCCOp || VCCOp->isDead();
2589
2590 unsigned OtherOpIdx =
2591 FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2593
2594 unsigned Src1Idx = Src0Idx + 1;
2595 Register MaterializedReg = FrameReg;
2597
2598 int64_t Offset = FrameInfo.getObjectOffset(Index);
2599
2600
2601
2602 if (OtherOp->isImm()) {
2603 int64_t TotalOffset = OtherOp->getImm() + Offset;
2604
2607
2608
2609
2610 break;
2611 }
2612
2613 OtherOp->setImm(TotalOffset);
2615 }
2616
2617 if (FrameReg && !ST.enableFlatScratch()) {
2618
2619
2620
2621
2622
2623
2624
2625 ScavengedVGPR = RS->scavengeRegisterBackwards(
2626 AMDGPU::VGPR_32RegClass, MI, false, 0);
2627
2628
2629
2632 .addImm(ST.getWavefrontSizeLog2())
2634 MaterializedReg = ScavengedVGPR;
2635 }
2636
2637 if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {
2638 if (ST.enableFlatScratch() &&
2639 ->isOperandLegal(*MI, Src1Idx, OtherOp)) {
2640
2641
2642
2643
2644
2645
2646 if (!ScavengedVGPR) {
2647 ScavengedVGPR = RS->scavengeRegisterBackwards(
2648 AMDGPU::VGPR_32RegClass, MI, false,
2649 0);
2650 }
2651
2652 assert(ScavengedVGPR != DstReg);
2653
2654 BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2655 .addReg(MaterializedReg,
2657 MaterializedReg = ScavengedVGPR;
2658 }
2659
2660
2661
2662
2665 if (NumDefs == 2)
2666 AddI32.add(MI->getOperand(1));
2667
2668 unsigned MaterializedRegFlags =
2670
2671 if (isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2672
2673
2674 AddI32
2675 .add(*OtherOp)
2676 .addReg(MaterializedReg, MaterializedRegFlags);
2677 } else {
2678
2679
2680 AddI32
2681 .addReg(MaterializedReg, MaterializedRegFlags)
2682 .add(*OtherOp);
2683 }
2684
2685 if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2686 MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2687 AddI32.addImm(0);
2688
2689 if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2690 AddI32.setOperandDead(3);
2691
2692 MaterializedReg = DstReg;
2693
2698 } else if (Offset != 0) {
2699 assert(!MaterializedReg);
2702 } else {
2703 if (DeadVCC && !HasClamp) {
2705
2706
2707
2708 if (OtherOp->isReg() && OtherOp->getReg() == DstReg) {
2709
2710 MI->eraseFromParent();
2711 return true;
2712 }
2713
2714
2715 MI->setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
2716 MI->removeOperand(FIOperandNum);
2717
2718 unsigned NumOps = MI->getNumOperands();
2719 for (unsigned I = NumOps - 2; I >= NumDefs + 1; --I)
2721
2722 if (NumDefs == 2)
2723 MI->removeOperand(1);
2724
2725
2726 return true;
2727 }
2728
2729
2730
2732 }
2733
2734
2735 if (->isOperandLegal(*MI, Src1Idx) && TII->commuteInstruction(*MI)) {
2737 std::swap(FIOperandNum, OtherOpIdx);
2738 }
2739
2740
2741
2742
2743 for (unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {
2744 if (->isOperandLegal(*MI, SrcIdx)) {
2745
2746
2747
2748 if (!ScavengedVGPR) {
2749 ScavengedVGPR = RS->scavengeRegisterBackwards(
2750 AMDGPU::VGPR_32RegClass, MI, false,
2751 0);
2752 }
2753
2754 assert(ScavengedVGPR != DstReg);
2755
2757 BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2758 .add(Src);
2759
2760 Src.ChangeToRegister(ScavengedVGPR, false);
2761 Src.setIsKill(true);
2762 break;
2763 }
2764 }
2765
2766
2767 if (FIOp->isImm() && FIOp->getImm() == 0 && DeadVCC && !HasClamp) {
2768 if (OtherOp->isReg() && OtherOp->getReg() != DstReg) {
2770 }
2771
2772 MI->eraseFromParent();
2773 }
2774
2775 return true;
2776 }
2777 case AMDGPU::S_ADD_I32:
2778 case AMDGPU::S_ADD_U32: {
2779
2780 unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2782
2784
2787 Register MaterializedReg = FrameReg;
2788
2789
2790 bool DeadSCC = MI->getOperand(3).isDead();
2791
2793
2794
2795
2798
2799 if (FrameReg && !ST.enableFlatScratch()) {
2800
2801
2802
2803 if (!TmpReg)
2804 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2805 MI, false, 0,
2806 false);
2807 if (TmpReg) {
2811 .addImm(ST.getWavefrontSizeLog2())
2813 }
2814 MaterializedReg = TmpReg;
2815 }
2816
2817 int64_t Offset = FrameInfo.getObjectOffset(Index);
2818
2819
2820
2821
2822 if (OtherOp.isImm()) {
2825
2826 if (MaterializedReg)
2828 else
2830 } else if (MaterializedReg) {
2831
2833
2834 if (!TmpReg && MaterializedReg == FrameReg) {
2835 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2836 MI, false, 0,
2837 false);
2838 DstReg = TmpReg;
2839 }
2840
2841 if (TmpReg) {
2845 .add(OtherOp);
2846 if (DeadSCC)
2848
2849 MaterializedReg = DstReg;
2850
2854 }
2856 } else {
2857
2858
2860 }
2861
2862 if (DeadSCC && OtherOp.isImm() && OtherOp.getImm() == 0) {
2864 MI->removeOperand(3);
2865 MI->removeOperand(OtherOpIdx);
2866 MI->setDesc(TII->get(FIOp->isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2867 } else if (DeadSCC && FIOp->isImm() && FIOp->getImm() == 0) {
2869 MI->removeOperand(3);
2870 MI->removeOperand(FIOperandNum);
2871 MI->setDesc(
2872 TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2873 }
2874
2876 return true;
2877 }
2878 default: {
2879 break;
2880 }
2881 }
2882
2883 int64_t Offset = FrameInfo.getObjectOffset(Index);
2884 if (ST.enableFlatScratch()) {
2885 if (TII->isFLATScratch(*MI)) {
2887 (int16_t)FIOperandNum ==
2888 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr));
2889
2890
2891 if (FrameReg)
2893
2895 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2899 OffsetOp->setImm(NewOffset);
2900 if (FrameReg)
2901 return false;
2903 }
2904
2906 unsigned Opc = MI->getOpcode();
2907 int NewOpc = -1;
2910 } else if (ST.hasFlatScratchSTMode()) {
2911
2912
2914 }
2915
2916 if (NewOpc != -1) {
2917
2918
2919 int VDstIn =
2920 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
2921 bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() &&
2922 MI->getOperand(VDstIn).isTied();
2923 if (TiedVDst)
2924 MI->untieRegOperand(VDstIn);
2925
2926 MI->removeOperand(
2927 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2928
2929 if (TiedVDst) {
2930 int NewVDst =
2931 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2932 int NewVDstIn =
2933 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2934 assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2935 MI->tieOperands(NewVDst, NewVDstIn);
2936 }
2937 MI->setDesc(TII->get(NewOpc));
2938 return false;
2939 }
2940 }
2941 }
2942
2943 if (!FrameReg) {
2945 if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
2946 return false;
2947 }
2948
2949
2950
2952 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
2953
2954 if ( && FrameReg && UseSGPR) {
2955 FIOp->setReg(FrameReg);
2956 return false;
2957 }
2958
2960 UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
2961
2963 RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
2964 FIOp->setReg(TmpReg);
2966
2967 if ((!FrameReg || ) && TmpReg) {
2968 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2970 if (FrameReg)
2971 MIB.addReg(FrameReg);
2972 else
2974
2975 return false;
2976 }
2977
2978 bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
2979 ->definesRegister(AMDGPU::SCC, nullptr);
2980
2982 UseSGPR ? TmpReg
2983 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2984 MI, false, 0, !UseSGPR);
2985
2986
2987
2988 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2990
2991 if (!TmpSReg) {
2992
2993 TmpSReg = FrameReg;
2994 FIOp->setReg(FrameReg);
2996 }
2997
2998 if (NeedSaveSCC) {
2999 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
3009 } else {
3013 }
3014
3015 if (!UseSGPR)
3018
3019 if (TmpSReg == FrameReg) {
3020
3021 if (NeedSaveSCC &&
3022 ->registerDefIsDead(AMDGPU::SCC, nullptr)) {
3025 TmpSReg)
3028 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
3031 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
3032 TmpSReg)
3035 } else {
3037 FrameReg)
3040 }
3041 }
3042
3043 return false;
3044 }
3045
3046 bool IsMUBUF = TII->isMUBUF(*MI);
3047
3049
3050
3051 bool IsSALU = isSGPRClass(TII->getRegClass(MI->getDesc(), FIOperandNum));
3052 bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
3053 ->definesRegister(AMDGPU::SCC, nullptr);
3055 ? &AMDGPU::SReg_32RegClass
3056 : &AMDGPU::VGPR_32RegClass;
3057 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
3058 MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
3059 MI->getOpcode() == AMDGPU::S_MOV_B32;
3061 IsCopy ? MI->getOperand(0).getReg()
3062 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
3063
3064 int64_t Offset = FrameInfo.getObjectOffset(Index);
3066 unsigned OpCode =
3067 IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
3068 Register TmpResultReg = ResultReg;
3069 if (IsSALU && LiveSCC) {
3070 TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
3071 MI, false, 0);
3072 }
3073
3074 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
3075 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
3076
3077
3078 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
3079 else
3080 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
3081 if (IsSALU && !LiveSCC)
3082 Shift.getInstr()->getOperand(3).setIsDead();
3083 if (IsSALU && LiveSCC) {
3085 if (IsCopy) {
3087 NewDest = ResultReg;
3088 } else {
3089 NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3090 Shift, false, 0);
3091 }
3092 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest)
3093 .addReg(TmpResultReg);
3094 ResultReg = NewDest;
3095 }
3096 } else {
3098 if (!IsSALU) {
3099 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
3100 nullptr) {
3101
3102 Register ScaledReg = ResultReg;
3103
3104 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
3105 ScaledReg)
3106 .addImm(ST.getWavefrontSizeLog2())
3108
3109 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
3110
3111
3112 if (IsVOP2 ||
3114
3117 if (!IsVOP2)
3118 MIB.addImm(0);
3119 } else {
3121 "Need to reuse carry out register");
3122
3123
3125 if (!isWave32)
3126 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
3127 else
3128 ConstOffsetReg = MIB.getReg(1);
3129
3131 ConstOffsetReg)
3135 MIB.addImm(0);
3136 }
3137 }
3138 }
3139 if (!MIB || IsSALU) {
3140
3141
3142
3143
3144
3145
3146 Register TmpScaledReg = IsCopy && IsSALU
3147 ? ResultReg
3148 : RS->scavengeRegisterBackwards(
3149 AMDGPU::SReg_32_XM0RegClass, MI,
3150 false, 0, false);
3151 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
3152 Register TmpResultReg = ScaledReg;
3153
3154 if (!LiveSCC) {
3155 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
3157 .addImm(ST.getWavefrontSizeLog2());
3158 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
3161 } else {
3162 TmpResultReg = RS->scavengeRegisterBackwards(
3163 AMDGPU::VGPR_32RegClass, MI, false, 0, true);
3164
3166 if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {
3168 TmpResultReg)
3169 .addImm(ST.getWavefrontSizeLog2())
3171 if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
3176 .addImm(0);
3177 } else
3179 } else {
3181 "offset is unsafe for v_mad_u32_u24");
3182
3183
3184
3185
3186
3187
3188
3189
3190 bool IsInlinableLiteral =
3192 if (!IsInlinableLiteral) {
3194 TmpResultReg)
3196 }
3197
3199 TmpResultReg);
3200
3201 if (!IsInlinableLiteral) {
3203 } else {
3204
3206 }
3207 Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
3209 TmpResultReg)
3210 .addImm(ST.getWavefrontSizeLog2())
3211 .addReg(TmpResultReg);
3212 }
3213
3215 if (IsCopy) {
3216 NewDest = ResultReg;
3217 } else {
3218 NewDest = RS->scavengeRegisterBackwards(
3219 AMDGPU::SReg_32_XM0RegClass, *Add, false, 0,
3220 true);
3221 }
3222
3224 NewDest)
3225 .addReg(TmpResultReg);
3226 ResultReg = NewDest;
3227 }
3228 if (!IsSALU)
3231 else
3232 ResultReg = TmpResultReg;
3233
3234 if (!TmpScaledReg.isValid()) {
3240 .addImm(ST.getWavefrontSizeLog2());
3241 }
3242 }
3243 }
3244
3245
3246 if (IsCopy) {
3247 MI->eraseFromParent();
3248 return true;
3249 }
3251 return false;
3252 }
3253
3254 if (IsMUBUF) {
3255
3257 static_cast<int>(FIOperandNum) ==
3258 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr));
3259
3260 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
3261 assert((SOffset.isImm() && SOffset.getImm() == 0));
3262
3263 if (FrameReg != AMDGPU::NoRegister)
3264 SOffset.ChangeToRegister(FrameReg, false);
3265
3266 int64_t Offset = FrameInfo.getObjectOffset(Index);
3267 int64_t OldImm =
3268 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
3269 int64_t NewOffset = OldImm + Offset;
3270
3271 if (TII->isLegalMUBUFImmOffset(NewOffset) &&
3273 MI->eraseFromParent();
3274 return true;
3275 }
3276 }
3277
3278
3279
3280
3282 if (->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {
3284 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
3288 }
3289
3290 return false;
3291}
3292
3296
3300
3304
3308 return &AMDGPU::VReg_64RegClass;
3310 return &AMDGPU::VReg_96RegClass;
3312 return &AMDGPU::VReg_128RegClass;
3314 return &AMDGPU::VReg_160RegClass;
3316 return &AMDGPU::VReg_192RegClass;
3318 return &AMDGPU::VReg_224RegClass;
3320 return &AMDGPU::VReg_256RegClass;
3322 return &AMDGPU::VReg_288RegClass;
3324 return &AMDGPU::VReg_320RegClass;
3326 return &AMDGPU::VReg_352RegClass;
3328 return &AMDGPU::VReg_384RegClass;
3330 return &AMDGPU::VReg_512RegClass;
3332 return &AMDGPU::VReg_1024RegClass;
3333
3334 return nullptr;
3335}
3336
3340 return &AMDGPU::VReg_64_Align2RegClass;
3342 return &AMDGPU::VReg_96_Align2RegClass;
3344 return &AMDGPU::VReg_128_Align2RegClass;
3346 return &AMDGPU::VReg_160_Align2RegClass;
3348 return &AMDGPU::VReg_192_Align2RegClass;
3350 return &AMDGPU::VReg_224_Align2RegClass;
3352 return &AMDGPU::VReg_256_Align2RegClass;
3354 return &AMDGPU::VReg_288_Align2RegClass;
3356 return &AMDGPU::VReg_320_Align2RegClass;
3358 return &AMDGPU::VReg_352_Align2RegClass;
3360 return &AMDGPU::VReg_384_Align2RegClass;
3362 return &AMDGPU::VReg_512_Align2RegClass;
3364 return &AMDGPU::VReg_1024_Align2RegClass;
3365
3366 return nullptr;
3367}
3368
3372 return &AMDGPU::VReg_1RegClass;
3374 return &AMDGPU::VGPR_16RegClass;
3376 return &AMDGPU::VGPR_32RegClass;
3379}
3380
3384 return &AMDGPU::VGPR_32_Lo256RegClass;
3386 return &AMDGPU::VReg_64_Lo256_Align2RegClass;
3388 return &AMDGPU::VReg_96_Lo256_Align2RegClass;
3390 return &AMDGPU::VReg_128_Lo256_Align2RegClass;
3392 return &AMDGPU::VReg_160_Lo256_Align2RegClass;
3394 return &AMDGPU::VReg_192_Lo256_Align2RegClass;
3396 return &AMDGPU::VReg_224_Lo256_Align2RegClass;
3398 return &AMDGPU::VReg_256_Lo256_Align2RegClass;
3400 return &AMDGPU::VReg_288_Lo256_Align2RegClass;
3402 return &AMDGPU::VReg_320_Lo256_Align2RegClass;
3404 return &AMDGPU::VReg_352_Lo256_Align2RegClass;
3406 return &AMDGPU::VReg_384_Lo256_Align2RegClass;
3408 return &AMDGPU::VReg_512_Lo256_Align2RegClass;
3410 return &AMDGPU::VReg_1024_Lo256_Align2RegClass;
3411
3412 return nullptr;
3413}
3414
3418 return &AMDGPU::AReg_64RegClass;
3420 return &AMDGPU::AReg_96RegClass;
3422 return &AMDGPU::AReg_128RegClass;
3424 return &AMDGPU::AReg_160RegClass;
3426 return &AMDGPU::AReg_192RegClass;
3428 return &AMDGPU::AReg_224RegClass;
3430 return &AMDGPU::AReg_256RegClass;
3432 return &AMDGPU::AReg_288RegClass;
3434 return &AMDGPU::AReg_320RegClass;
3436 return &AMDGPU::AReg_352RegClass;
3438 return &AMDGPU::AReg_384RegClass;
3440 return &AMDGPU::AReg_512RegClass;
3442 return &AMDGPU::AReg_1024RegClass;
3443
3444 return nullptr;
3445}
3446
3450 return &AMDGPU::AReg_64_Align2RegClass;
3452 return &AMDGPU::AReg_96_Align2RegClass;
3454 return &AMDGPU::AReg_128_Align2RegClass;
3456 return &AMDGPU::AReg_160_Align2RegClass;
3458 return &AMDGPU::AReg_192_Align2RegClass;
3460 return &AMDGPU::AReg_224_Align2RegClass;
3462 return &AMDGPU::AReg_256_Align2RegClass;
3464 return &AMDGPU::AReg_288_Align2RegClass;
3466 return &AMDGPU::AReg_320_Align2RegClass;
3468 return &AMDGPU::AReg_352_Align2RegClass;
3470 return &AMDGPU::AReg_384_Align2RegClass;
3472 return &AMDGPU::AReg_512_Align2RegClass;
3474 return &AMDGPU::AReg_1024_Align2RegClass;
3475
3476 return nullptr;
3477}
3478
3482 return &AMDGPU::AGPR_LO16RegClass;
3484 return &AMDGPU::AGPR_32RegClass;
3487}
3488
3492 return &AMDGPU::AV_64RegClass;
3494 return &AMDGPU::AV_96RegClass;
3496 return &AMDGPU::AV_128RegClass;
3498 return &AMDGPU::AV_160RegClass;
3500 return &AMDGPU::AV_192RegClass;
3502 return &AMDGPU::AV_224RegClass;
3504 return &AMDGPU::AV_256RegClass;
3506 return &AMDGPU::AV_288RegClass;
3508 return &AMDGPU::AV_320RegClass;
3510 return &AMDGPU::AV_352RegClass;
3512 return &AMDGPU::AV_384RegClass;
3514 return &AMDGPU::AV_512RegClass;
3516 return &AMDGPU::AV_1024RegClass;
3517
3518 return nullptr;
3519}
3520
3524 return &AMDGPU::AV_64_Align2RegClass;
3526 return &AMDGPU::AV_96_Align2RegClass;
3528 return &AMDGPU::AV_128_Align2RegClass;
3530 return &AMDGPU::AV_160_Align2RegClass;
3532 return &AMDGPU::AV_192_Align2RegClass;
3534 return &AMDGPU::AV_224_Align2RegClass;
3536 return &AMDGPU::AV_256_Align2RegClass;
3538 return &AMDGPU::AV_288_Align2RegClass;
3540 return &AMDGPU::AV_320_Align2RegClass;
3542 return &AMDGPU::AV_352_Align2RegClass;
3544 return &AMDGPU::AV_384_Align2RegClass;
3546 return &AMDGPU::AV_512_Align2RegClass;
3548 return &AMDGPU::AV_1024_Align2RegClass;
3549
3550 return nullptr;
3551}
3552
3556 return &AMDGPU::AV_32RegClass;
3557 return ST.needsAlignedVGPRs()
3560}
3561
3564
3565
3566
3567
3568
3571}
3572
3576 return &AMDGPU::SReg_32RegClass;
3578 return &AMDGPU::SReg_64RegClass;
3580 return &AMDGPU::SGPR_96RegClass;
3582 return &AMDGPU::SGPR_128RegClass;
3584 return &AMDGPU::SGPR_160RegClass;
3586 return &AMDGPU::SGPR_192RegClass;
3588 return &AMDGPU::SGPR_224RegClass;
3590 return &AMDGPU::SGPR_256RegClass;
3592 return &AMDGPU::SGPR_288RegClass;
3594 return &AMDGPU::SGPR_320RegClass;
3596 return &AMDGPU::SGPR_352RegClass;
3598 return &AMDGPU::SGPR_384RegClass;
3600 return &AMDGPU::SGPR_512RegClass;
3602 return &AMDGPU::SGPR_1024RegClass;
3603
3604 return nullptr;
3605}
3606
3610 if (Reg.isVirtual())
3611 RC = MRI.getRegClass(Reg);
3612 else
3613 RC = getPhysRegBaseClass(Reg);
3615}
3616
3619 unsigned Size = getRegSizeInBits(*SRC);
3620
3621 switch (SRC->getID()) {
3622 default:
3623 break;
3624 case AMDGPU::VS_32_Lo256RegClassID:
3625 case AMDGPU::VS_64_Lo256RegClassID:
3627 }
3628
3631 assert(VRC && "Invalid register class size");
3632 return VRC;
3633}
3634
3637 unsigned Size = getRegSizeInBits(*SRC);
3639 assert(ARC && "Invalid register class size");
3640 return ARC;
3641}
3642
3645 unsigned Size = getRegSizeInBits(*SRC);
3647 assert(ARC && "Invalid register class size");
3648 return ARC;
3649}
3650
3653 unsigned Size = getRegSizeInBits(*VRC);
3654 if (Size == 32)
3655 return &AMDGPU::SGPR_32RegClass;
3657 assert(SRC && "Invalid register class size");
3658 return SRC;
3659}
3660
3664 unsigned SubIdx) const {
3665
3667 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
3668 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
3669}
3670
3674 return !ST.hasMFMAInlineLiteralBug();
3675
3678}
3679
3685
3686
3687
3688
3689
3692 const MachineFunction &MF, bool ReserveHighestRegister) const {
3693 if (ReserveHighestRegister) {
3695 if (MRI.isAllocatable(Reg) && .isPhysRegUsed(Reg))
3696 return Reg;
3697 } else {
3699 if (MRI.isAllocatable(Reg) && .isPhysRegUsed(Reg))
3700 return Reg;
3701 }
3703}
3704
3708 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
3709 if (!RB)
3710 return false;
3711
3713}
3714
3716 unsigned EltSize) const {
3718 assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);
3719
3720 const unsigned RegHalves = RegBitWidth / 16;
3721 const unsigned EltHalves = EltSize / 2;
3722 assert(RegSplitParts.size() + 1 >= EltHalves);
3723
3724 const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];
3725 const unsigned NumParts = RegHalves / EltHalves;
3726
3727 return ArrayRef(Parts.data(), NumParts);
3728}
3729
3733 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3734}
3735
3740 return getSubRegisterClass(SrcRC, MO.getSubReg());
3741}
3742
3749
3757
3760 unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
3761 switch (RC->getID()) {
3762 default:
3763 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3764 case AMDGPU::VGPR_32RegClassID:
3765 return std::min(
3766 ST.getMaxNumVGPRs(
3767 MinOcc,
3769 ST.getMaxNumVGPRs(MF));
3770 case AMDGPU::SGPR_32RegClassID:
3771 case AMDGPU::SGPR_LO16RegClassID:
3772 return std::min(ST.getMaxNumSGPRs(MinOcc, true), ST.getMaxNumSGPRs(MF));
3773 }
3774}
3775
3777 unsigned Idx) const {
3778 switch (static_castAMDGPU::RegisterPressureSets\(Idx)) {
3779 case AMDGPU::RegisterPressureSets::VGPR_32:
3780 case AMDGPU::RegisterPressureSets::AGPR_32:
3783 case AMDGPU::RegisterPressureSets::SReg_32:
3786 }
3787
3789}
3790
3792 static const int Empty[] = { -1 };
3793
3794 if (RegPressureIgnoredUnits[static_cast<unsigned>(RegUnit)])
3796
3797 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3798}
3799
3806
3809
3810 std::pair<unsigned, Register> Hint = MRI.getRegAllocationHint(VirtReg);
3811
3812 switch (Hint.first) {
3814 Register Paired = Hint.second;
3818 PairedPhys =
3819 getMatchingSuperReg(Paired, AMDGPU::lo16, &AMDGPU::VGPR_32RegClass);
3820 } else if (VRM && VRM->hasPhys(Paired)) {
3821 PairedPhys = getMatchingSuperReg(VRM->getPhys(Paired), AMDGPU::lo16,
3822 &AMDGPU::VGPR_32RegClass);
3823 }
3824
3825
3826 if (PairedPhys)
3827
3828
3830 return false;
3831 }
3833 Register Paired = Hint.second;
3837 PairedPhys = TRI->getSubReg(Paired, AMDGPU::lo16);
3838 } else if (VRM && VRM->hasPhys(Paired)) {
3839 PairedPhys = TRI->getSubReg(VRM->getPhys(Paired), AMDGPU::lo16);
3840 }
3841
3842
3843 if (PairedPhys)
3845 else {
3846
3847
3848
3849
3850
3851 for (MCPhysReg PhysReg : Order) {
3853 continue;
3854 if (AMDGPU::VGPR_16RegClass.contains(PhysReg) &&
3855 .isReserved(PhysReg))
3857 }
3858 }
3859 return false;
3860 }
3861 default:
3863 VRM);
3864 }
3865}
3866
3868
3869 return AMDGPU::SGPR30_SGPR31;
3870}
3871
3875 switch (RB.getID()) {
3876 case AMDGPU::VGPRRegBankID:
3878 std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));
3879 case AMDGPU::VCCRegBankID:
3882 case AMDGPU::SGPRRegBankID:
3884 case AMDGPU::AGPRRegBankID:
3886 default:
3888 }
3889}
3890
3897
3899 return getAllocatableClass(RC);
3900
3901 return nullptr;
3902}
3903
3905 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3906}
3907
3909 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3910}
3911
3913
3914 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3915 : &AMDGPU::VReg_64RegClass;
3916}
3917
3918
3926
3927 if (Reg.isVirtual()) {
3929 return nullptr;
3932 : MRI.getMaxLaneMaskForVReg(Reg);
3933 VNInfo *V = nullptr;
3935 for (auto &S : LI.subranges()) {
3936 if ((S.LaneMask & SubLanes) == SubLanes) {
3937 V = S.getVNInfoAt(UseIdx);
3938 break;
3939 }
3940 }
3941 } else {
3943 }
3944 if (!V)
3945 return nullptr;
3946 DefIdx = V->def;
3947 } else {
3948
3949 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
3952 if (!DefIdx.isValid() ||
3955 DefIdx = V->def;
3956 } else {
3957 return nullptr;
3958 }
3959 }
3960 }
3961
3963
3964 if (!Def || !MDT.dominates(Def, &Use))
3965 return nullptr;
3966
3967 assert(Def->modifiesRegister(Reg, this));
3968
3969 return Def;
3970}
3971
3973 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3974
3976 AMDGPU::SReg_32RegClass,
3977 AMDGPU::AGPR_32RegClass } ) {
3978 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3979 return Super;
3980 }
3981 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3982 &AMDGPU::VGPR_32RegClass)) {
3983 return Super;
3984 }
3985
3986 return AMDGPU::NoRegister;
3987}
3988
3990 if (!ST.needsAlignedVGPRs())
3991 return true;
3992
4000
4001 assert(&RC != &AMDGPU::VS_64RegClass);
4002
4003 return true;
4004}
4005
4008 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
4009}
4010
4013 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
4014}
4015
4018 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
4019}
4020
4021unsigned
4023 unsigned SubReg) const {
4026 return std::min(128u, getSubRegIdxSize(SubReg));
4030 return std::min(32u, getSubRegIdxSize(SubReg));
4031 default:
4032 break;
4033 }
4034 return 0;
4035}
4036
4039 bool IncludeCalls) const {
4040 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
4042 (RC.getID() == AMDGPU::VGPR_32RegClassID)
4043 ? RC.getRegisters().take_front(NumArchVGPRs)
4046 if (MRI.isPhysRegUsed(Reg, !IncludeCalls))
4048 return 0;
4049}
4050
4058 return RegFlags;
4059}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
AMD GCN specific subclass of TargetSubtarget.
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
SI Pre allocate WWM Registers
static int getOffenMUBUFStore(unsigned Opc)
Definition SIRegisterInfo.cpp:1333
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
Definition SIRegisterInfo.cpp:3416
static int getOffsetMUBUFLoad(unsigned Opc)
Definition SIRegisterInfo.cpp:1298
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
Definition SIRegisterInfo.cpp:45
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, const SIInstrInfo *TII)
Definition SIRegisterInfo.cpp:1127
static void emitUnsupportedError(const Function &Fn, const MachineInstr &MI, const Twine &ErrMsg)
Definition SIRegisterInfo.cpp:48
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
Definition SIRegisterInfo.cpp:3448
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
Definition SIRegisterInfo.cpp:1434
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
Definition SIRegisterInfo.cpp:1471
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
Definition SIRegisterInfo.cpp:3338
static int getOffsetMUBUFStore(unsigned Opc)
Definition SIRegisterInfo.cpp:1275
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
Definition SIRegisterInfo.cpp:3306
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
Definition SIRegisterInfo.cpp:3522
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
Definition SIRegisterInfo.cpp:3490
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
Definition SIRegisterInfo.cpp:1391
static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI, const MachineInstr &MI)
Definition SIRegisterInfo.cpp:838
static int getOffenMUBUFLoad(unsigned Opc)
Definition SIRegisterInfo.cpp:1356
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const char * getRegisterName(MCRegister Reg)
bool isBottomOfStack() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool test(unsigned Idx) const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
MachineDominatorTree & getDomTree()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LiveRange & getRegUnit(MCRegUnit Unit)
Return the live range for register unit Unit.
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Generic base class for all target subtargets.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
LLVM_ABI void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static bool isFLATScratch(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
BitVector getNonWWMRegMask() const
bool checkFlag(Register Reg, uint8_t Flag) const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
Definition SIRegisterInfo.cpp:905
int64_t getScratchInstrOffset(const MachineInstr *MI) const
Definition SIRegisterInfo.cpp:799
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
Definition SIRegisterInfo.cpp:1086
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
Definition SIRegisterInfo.cpp:3662
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
Definition SIRegisterInfo.cpp:4012
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
Definition SIRegisterInfo.cpp:3690
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Definition SIRegisterInfo.cpp:555
MCPhysReg get32BitRegister(MCPhysReg Reg) const
Definition SIRegisterInfo.cpp:3972
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Definition SIRegisterInfo.cpp:431
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:787
bool shouldRealignStack(const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:754
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
Definition SIRegisterInfo.cpp:2128
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
Definition SIRegisterInfo.cpp:3989
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Definition SIRegisterInfo.cpp:3618
Register getFrameRegister(const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:516
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
Definition SIRegisterInfo.cpp:3554
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
Definition SIRegisterInfo.cpp:2208
SIRegisterInfo(const GCNSubtarget &ST)
Definition SIRegisterInfo.cpp:328
const uint32_t * getAllVGPRRegMask() const
Definition SIRegisterInfo.cpp:539
MCRegister getReturnAddressReg(const MachineFunction &MF) const
Definition SIRegisterInfo.cpp:3867
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Definition SIRegisterInfo.cpp:403
bool hasBasePointer(const MachineFunction &MF) const
Definition SIRegisterInfo.cpp:531
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
Definition SIRegisterInfo.cpp:1123
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
Definition SIRegisterInfo.cpp:3715
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
Definition SIRegisterInfo.cpp:4017
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:462
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
Definition SIRegisterInfo.cpp:573
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
Definition SIRegisterInfo.cpp:2282
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
Definition SIRegisterInfo.cpp:3743
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
Definition SIRegisterInfo.cpp:1512
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
Definition SIRegisterInfo.cpp:749
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
Definition SIRegisterInfo.cpp:3480
static bool isChainScratchRegister(Register VGPR)
Definition SIRegisterInfo.cpp:457
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
Definition SIRegisterInfo.cpp:767
bool opCanUseInlineConstant(unsigned OpType) const
Definition SIRegisterInfo.cpp:3671
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
Definition SIRegisterInfo.cpp:3873
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
Definition SIRegisterInfo.cpp:3892
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
Definition SIRegisterInfo.cpp:3705
const uint32_t * getNoPreservedMask() const override
Definition SIRegisterInfo.cpp:453
StringRef getRegAsmName(MCRegister Reg) const override
Definition SIRegisterInfo.cpp:3293
const uint32_t * getAllAllocatableSRegMask() const
Definition SIRegisterInfo.cpp:551
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
Definition SIRegisterInfo.cpp:565
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
Definition SIRegisterInfo.cpp:3731
unsigned getHWRegIndex(MCRegister Reg) const
Definition SIRegisterInfo.cpp:3297
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
Definition SIRegisterInfo.cpp:427
const uint32_t * getAllVectorRegMask() const
Definition SIRegisterInfo.cpp:547
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
Definition SIRegisterInfo.cpp:3636
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
Definition SIRegisterInfo.cpp:3574
const TargetRegisterClass * getPointerRegClass(unsigned Kind=0) const override
Definition SIRegisterInfo.cpp:1115
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Definition SIRegisterInfo.cpp:3680
Register getBaseRegister() const
Definition SIRegisterInfo.cpp:537
bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override
Definition SIRegisterInfo.cpp:3800
LLVM_READONLY const TargetRegisterClass * getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const
Definition SIRegisterInfo.cpp:3382
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
Definition SIRegisterInfo.cpp:3370
const TargetRegisterClass * getEquivalentAVClass(const TargetRegisterClass *SRC) const
Definition SIRegisterInfo.cpp:3644
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:778
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
Definition SIRegisterInfo.cpp:3919
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
Definition SIRegisterInfo.cpp:3607
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
Definition SIRegisterInfo.cpp:3652
SmallVector< StringLiteral > getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:4052
LLVM_READONLY const TargetRegisterClass * getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const
Definition SIRegisterInfo.cpp:3563
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:3758
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
Definition SIRegisterInfo.cpp:4007
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
Definition SIRegisterInfo.cpp:3776
BitVector getReservedRegs(const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:578
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
Definition SIRegisterInfo.cpp:857
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
Definition SIRegisterInfo.cpp:3737
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
Definition SIRegisterInfo.cpp:1952
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC, bool IncludeCalls=true) const
Definition SIRegisterInfo.cpp:4037
const uint32_t * getAllAGPRRegMask() const
Definition SIRegisterInfo.cpp:543
const int * getRegUnitPressureSets(MCRegUnit RegUnit) const override
Definition SIRegisterInfo.cpp:3791
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
Definition SIRegisterInfo.cpp:3750
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
Definition SIRegisterInfo.cpp:2321
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
Definition SIRegisterInfo.cpp:1997
MCRegister getExec() const
Definition SIRegisterInfo.cpp:3908
MCRegister getVCC() const
Definition SIRegisterInfo.cpp:3904
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
Definition SIRegisterInfo.cpp:807
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
Definition SIRegisterInfo.cpp:4022
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
Definition SIRegisterInfo.cpp:958
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
Definition SIRegisterInfo.cpp:793
const TargetRegisterClass * getVGPR64Class() const
Definition SIRegisterInfo.cpp:3912
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
Definition SIRegisterInfo.cpp:1964
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
const uint8_t TSFlags
Configurable target specific flags.
ArrayRef< MCPhysReg > getRegisters() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
virtual bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM=nullptr, const LiveRegMatrix *Matrix=nullptr) const
Get a list of 'hint' registers that the register allocator should try first when allocating a physica...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
Definition SIRegisterInfo.cpp:3301
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
auto reverse(ContainerTy &&C)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getDefRegState(bool B)
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition SIRegisterInfo.cpp:79
int64_t VGPRLanes
Definition SIRegisterInfo.cpp:82
unsigned NumVGPRs
Definition SIRegisterInfo.cpp:81
unsigned PerVGPR
Definition SIRegisterInfo.cpp:80
Definition SIRegisterInfo.cpp:78
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
Definition SIRegisterInfo.cpp:319
ArrayRef< int16_t > SplitParts
Definition SIRegisterInfo.cpp:88
bool TmpVGPRLive
Definition SIRegisterInfo.cpp:100
SIMachineFunctionInfo & MFI
Definition SIRegisterInfo.cpp:110
Register TmpVGPR
Definition SIRegisterInfo.cpp:96
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
Definition SIRegisterInfo.cpp:118
bool IsWave32
Definition SIRegisterInfo.cpp:113
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
Definition SIRegisterInfo.cpp:124
unsigned MovOpc
Definition SIRegisterInfo.cpp:115
bool IsKill
Definition SIRegisterInfo.cpp:90
RegScavenger * RS
Definition SIRegisterInfo.cpp:107
const DebugLoc & DL
Definition SIRegisterInfo.cpp:91
unsigned NotOpc
Definition SIRegisterInfo.cpp:116
int Index
Definition SIRegisterInfo.cpp:104
void restore()
Definition SIRegisterInfo.cpp:254
PerVGPRData getPerVGPRData()
Definition SIRegisterInfo.cpp:150
int TmpVGPRIndex
Definition SIRegisterInfo.cpp:98
MachineBasicBlock::iterator MI
Definition SIRegisterInfo.cpp:87
Register ExecReg
Definition SIRegisterInfo.cpp:114
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
Definition SIRegisterInfo.cpp:295
const SIRegisterInfo & TRI
Definition SIRegisterInfo.cpp:112
unsigned NumSubRegs
Definition SIRegisterInfo.cpp:89
void prepare()
Definition SIRegisterInfo.cpp:171
MachineFunction & MF
Definition SIRegisterInfo.cpp:109
MachineBasicBlock * MBB
Definition SIRegisterInfo.cpp:108
Register SuperReg
Definition SIRegisterInfo.cpp:86
Register SavedExecReg
Definition SIRegisterInfo.cpp:102
const SIInstrInfo & TII
Definition SIRegisterInfo.cpp:111
unsigned EltSize
Definition SIRegisterInfo.cpp:105
The llvm::once_flag structure.