LLVM: lib/Target/AMDGPU/SIFrameLowering.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
19
20using namespace llvm;
21
22#define DEBUG_TYPE "frame-info"
23
25 "amdgpu-spill-vgpr-to-agpr",
26 cl::desc("Enable spilling VGPRs to AGPRs"),
29
30
31
32
33
34
45
46
47
48
49
53
54 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
55 for (unsigned i = 0; CSRegs[i]; ++i)
56 LiveUnits.addReg(CSRegs[i]);
57
58
59
60 if (Unused)
62
65 return Reg;
66 }
67
69}
70
71
72
76 bool IncludeScratchCopy = true) {
79
82 unsigned Size = TRI->getSpillSize(RC);
83 Align Alignment = TRI->getSpillAlign(RC);
84
85
86
88
89
90
91 if (IncludeScratchCopy)
93
94 if (!ScratchSGPR) {
95 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
97
98 if (TRI->spillSGPRToVGPR() &&
100 true)) {
101
102
106
108 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
109 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
110 << '\n';);
111 } else {
112
114
115 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
117 SGPR,
119 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
121 }
122 } else {
126 LiveUnits.addReg(ScratchSGPR);
129 }
130}
131
132
133
134
141 int64_t DwordOff = 0) {
142 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
143 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
144
150 LiveUnits.addReg(SpillReg);
151 bool IsKill = .isLiveIn(SpillReg);
152 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
153 DwordOff, MMO, nullptr, &LiveUnits);
154 if (IsKill)
156}
157
165 Register FrameReg, int64_t DwordOff = 0) {
166 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
167 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
168
174 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
175 DwordOff, MMO, nullptr, &LiveUnits);
176}
177
184 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
185 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
186 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
187
192 } else {
193 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
195 }
198 MBB.addLiveIn(GitPtrLo);
201}
202
207 if (LiveUnits.empty()) {
209 if (IsProlog) {
211 } else {
212
215 }
216 }
217}
218
219namespace llvm {
220
221
222
223
224
240 unsigned NumSubRegs;
241 unsigned EltSize = 4;
242
243 void saveToMemory(const int FI) const {
245 assert(!MFI.isDeadObjectIndex(FI));
246
247 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, true);
248
250 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
251 if (!TmpVGPR)
253
254 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
256 ? SuperReg
257 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
258 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
260
261 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
262 FI, FrameReg, DwordOff);
263 DwordOff += 4;
264 }
265 }
266
267 void saveToVGPRLane(const int FI) const {
268 assert(!MFI.isDeadObjectIndex(FI));
269
272 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
273 assert(Spill.size() == NumSubRegs);
274
275 for (unsigned I = 0; I < NumSubRegs; ++I) {
277 ? SuperReg
278 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
279 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
280 Spill[I].VGPR)
284 }
285 }
286
287 void copyToScratchSGPR(Register DstReg) const {
288 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
291 }
292
293 void restoreFromMemory(const int FI) {
295
296 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, false);
298 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
299 if (!TmpVGPR)
301
302 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
304 ? SuperReg
305 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
306
308 TmpVGPR, FI, FrameReg, DwordOff);
310
311 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
313 DwordOff += 4;
314 }
315 }
316
317 void restoreFromVGPRLane(const int FI) {
320 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
321 assert(Spill.size() == NumSubRegs);
322
323 for (unsigned I = 0; I < NumSubRegs; ++I) {
325 ? SuperReg
326 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
327 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
330 }
331 }
332
333 void copyFromScratchSGPR(Register SrcReg) const {
334 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
337 }
338
339public:
347 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
348 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
350 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
351 FrameReg(FrameReg) {
353 SplitParts = TRI.getRegSplitParts(RC, EltSize);
354 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
355
356 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
357 }
358
360 switch (SI.getKind()) {
362 return saveToMemory(SI.getIndex());
364 return saveToVGPRLane(SI.getIndex());
366 return copyToScratchSGPR(SI.getReg());
367 }
368 }
369
371 switch (SI.getKind()) {
373 return restoreFromMemory(SI.getIndex());
375 return restoreFromVGPRLane(SI.getIndex());
377 return copyFromScratchSGPR(SI.getReg());
378 }
379 }
380};
381
382}
383
384
385void SIFrameLowering::emitEntryFunctionFlatScratchInit(
388 const GCNSubtarget &ST = MF.getSubtarget();
389 const SIInstrInfo *TII = ST.getInstrInfo();
391 const SIMachineFunctionInfo *MFI = MF.getInfo();
392
393
394
395
396
397
398
399
400
401
402
405
406 if (ST.isAmdPalOS()) {
407
408 LiveRegUnits LiveUnits;
411
412
414 Register FlatScrInit = AMDGPU::NoRegister;
417 AllSGPR64s = AllSGPR64s.slice(
418 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
422 MRI.isAllocatable(Reg) && ->isSubRegisterEq(Reg, GITPtrLoReg)) {
423 FlatScrInit = Reg;
424 break;
425 }
426 }
427 assert(FlatScrInit && "Failed to find free register for scratch init");
428
429 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
430 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
431
433
434
435
437 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
439 PtrInfo,
445 const GCNSubtarget &Subtarget = MF.getSubtarget();
449 .addImm(EncodedOffset)
452
453
454 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
456 .addReg(FlatScrInitHi)
459 } else {
460 Register FlatScratchInitReg =
462 assert(FlatScratchInitReg);
463
465 MRI.addLiveIn(FlatScratchInitReg);
467
468 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
469 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
470 }
471
472
473 if (ST.flatScratchIsPointer()) {
476 .addReg(FlatScrInitLo)
477 .addReg(ScratchWaveOffsetReg);
479 FlatScrInitHi)
480 .addReg(FlatScrInitHi)
483
484 using namespace AMDGPU::Hwreg;
486 .addReg(FlatScrInitLo)
487 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
489 .addReg(FlatScrInitHi)
490 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
491 return;
492 }
493
494
496 .addReg(FlatScrInitLo)
497 .addReg(ScratchWaveOffsetReg);
499 AMDGPU::FLAT_SCR_HI)
500 .addReg(FlatScrInitHi)
503
504 return;
505 }
506
508
509
512
513
514
516 .addReg(FlatScrInitLo)
517 .addReg(ScratchWaveOffsetReg);
518
519
521 AMDGPU::FLAT_SCR_HI)
525}
526
527
528
533 return false;
534 }
535
536 return true;
537}
538
539
540Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
542
543 const GCNSubtarget &ST = MF.getSubtarget();
544 const SIInstrInfo *TII = ST.getInstrInfo();
547 SIMachineFunctionInfo *MFI = MF.getInfo();
548
550
552
553 if (!ScratchRsrcReg || (.isPhysRegUsed(ScratchRsrcReg) &&
556
557 if (ST.hasSGPRInitBug() ||
558 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
559 return ScratchRsrcReg;
560
561
562
563
564
565
566
567
568
569
572 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
573
574
575
578
579
580
581 if (.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
582 (!GITPtrLoReg || ->isSubRegisterEq(Reg, GITPtrLoReg))) {
583 MRI.replaceRegWith(ScratchRsrcReg, Reg);
586 return Reg;
587 }
588 }
589
590 return ScratchRsrcReg;
591}
592
594 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
595}
596
599 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
600
601
602
603
604
605
606
607
608
609
610
611
619
621
624
625
626
627
628
629
630
632 if (!ST.enableFlatScratch())
633 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
634
635
636 if (ScratchRsrcReg) {
638 if (&OtherBB != &MBB) {
639 OtherBB.addLiveIn(ScratchRsrcReg);
640 }
641 }
642 }
643
644
645
646 Register PreloadedScratchRsrcReg;
647 if (ST.isAmdHsaOrMesa(F)) {
648 PreloadedScratchRsrcReg =
650 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
651
652
653 MRI.addLiveIn(PreloadedScratchRsrcReg);
654 MBB.addLiveIn(PreloadedScratchRsrcReg);
655 }
656 }
657
658
659
662
663
664
665
666
667
668 Register ScratchWaveOffsetReg;
669 if (PreloadedScratchWaveOffsetReg &&
670 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
673 AllSGPRs = AllSGPRs.slice(
674 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
677 if (.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
678 ->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
679 ScratchWaveOffsetReg = Reg;
680 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
682 break;
683 }
684 }
685
686
687
688 if (!ScratchWaveOffsetReg)
690 "could not find temporary scratch offset register in prolog");
691 } else {
692 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
693 }
694 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
695
702 }
703
708 }
709 } else {
710
711
712
713
714
715
723 4,
726
730
731
732
738
739
740
743 ST.hasInv2PiInlineImm())) {
747 } else {
751 }
752 }
753 }
754
755 bool NeedsFlatScratchInit =
757 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
759
760 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
761 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
762 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
763 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
764 }
765
766 if (NeedsFlatScratchInit) {
767 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
768 }
769
770 if (ScratchRsrcReg) {
771 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
772 PreloadedScratchRsrcReg,
773 ScratchRsrcReg, ScratchWaveOffsetReg);
774 }
775
776 if (ST.hasWaitXCnt()) {
777
778
779
780 unsigned RegEncoding =
784 .addImm(RegEncoding);
785 }
786}
787
788
789void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
792 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
793
799
800 if (ST.isAmdPalOS()) {
801
802
803 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
804 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
805
807
808
809
811 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
813 PtrInfo,
822 .addImm(EncodedOffset)
826
827
828
829
830
831
832
833
834 if (ST.isWave32()) {
835 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
839 }
840 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
841 assert(.isAmdHsaOrMesa(Fn));
842 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
843
844 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
845 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
846
847
848 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
849
851 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
852
854 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
855
859 } else {
860 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
861
864 PtrInfo,
874
877 }
878 } else {
879 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
880 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
881
885
889 }
890
894
898 } else if (ST.isAmdHsaOrMesa(Fn)) {
899 assert(PreloadedScratchRsrcReg);
900
901 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
904 }
905 }
906
907
908
909
910
911
912
913
914
915
916 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
917 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
918
919
920
922 .addReg(ScratchRsrcSub0)
923 .addReg(ScratchWaveOffsetReg)
925 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
926 .addReg(ScratchRsrcSub1)
930}
931
933 switch (ID) {
937 return true;
941 return false;
942 }
944}
945
946
947
953 bool EnableInactiveLanes) {
960
962
964
965
966 assert(IsProlog && "Epilog should look at return, not setup");
967 ScratchExecCopy =
968 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
969 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
970 } else {
972 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
973 }
974
975 if (!ScratchExecCopy)
977
978 LiveUnits.addReg(ScratchExecCopy);
979
980 const unsigned SaveExecOpc =
981 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
982 : AMDGPU::S_OR_SAVEEXEC_B32)
983 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
984 : AMDGPU::S_OR_SAVEEXEC_B64);
985 auto SaveExec =
988
989 return ScratchExecCopy;
990}
991
1002
1003
1004
1005
1009 if (!WWMScratchRegs.empty())
1010 ScratchExecCopy =
1012 true, true);
1013
1014 auto StoreWWMRegisters =
1016 for (const auto &Reg : WWMRegs) {
1018 int FI = Reg.second;
1020 VGPR, FI, FrameReg);
1021 }
1022 };
1023
1025 if (.isReserved(Reg)) {
1026 MRI.addLiveIn(Reg);
1027 MBB.addLiveIn(Reg);
1028 }
1029 }
1030 StoreWWMRegisters(WWMScratchRegs);
1031
1032 auto EnableAllLanes = [&]() {
1034 };
1035
1036 if (!WWMCalleeSavedRegs.empty()) {
1037 if (ScratchExecCopy) {
1038 EnableAllLanes();
1039 } else {
1041 true,
1042 false);
1043 }
1044 }
1045
1046 StoreWWMRegisters(WWMCalleeSavedRegs);
1048
1049
1050 if (!ScratchExecCopy)
1052 true);
1053 else if (WWMCalleeSavedRegs.empty())
1054 EnableAllLanes();
1055 } else if (ScratchExecCopy) {
1056
1059 LiveUnits.addReg(ScratchExecCopy);
1060 }
1061
1063
1065
1066
1067
1068
1070 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1071 if (!Reg)
1072 continue;
1073
1075 LiveUnits, FrameReg);
1077 }
1078
1079
1080
1083 if (!ScratchSGPRs.empty()) {
1085 for (MCPhysReg Reg : ScratchSGPRs)
1086 MBB.addLiveIn(Reg);
1087
1088 MBB.sortUniqueLiveIns();
1089 }
1090 if (!LiveUnits.empty()) {
1091 for (MCPhysReg Reg : ScratchSGPRs)
1092 LiveUnits.addReg(Reg);
1093 }
1094 }
1095}
1096
1107
1109
1110
1111
1112
1113
1115 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1116 if (!Reg)
1117 continue;
1118
1120 LiveUnits, FrameReg);
1122 }
1123
1124
1125
1126
1130 auto RestoreWWMRegisters =
1132 for (const auto &Reg : WWMRegs) {
1134 int FI = Reg.second;
1136 VGPR, FI, FrameReg);
1137 }
1138 };
1139
1141
1142
1143 RestoreWWMRegisters(WWMCalleeSavedRegs);
1144
1145
1147 unsigned Opcode = Return.getOpcode();
1148 switch (Opcode) {
1149 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1150 Opcode = AMDGPU::SI_RETURN;
1151 break;
1152 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1153 Opcode = AMDGPU::SI_TCRETURN_GFX;
1154 break;
1155 default:
1157 }
1158 Register OrigExec = Return.getOperand(0).getReg();
1159
1160 if (!WWMScratchRegs.empty()) {
1164 RestoreWWMRegisters(WWMScratchRegs);
1165 }
1166
1167
1169
1170
1171 Return.removeOperand(0);
1172 Return.setDesc(TII->get(Opcode));
1173
1174 return;
1175 }
1176
1177 if (!WWMScratchRegs.empty()) {
1178 ScratchExecCopy =
1180 false, true);
1181 }
1182 RestoreWWMRegisters(WWMScratchRegs);
1183 if (!WWMCalleeSavedRegs.empty()) {
1184 if (ScratchExecCopy) {
1186 } else {
1188 false,
1189 false);
1190 }
1191 }
1192
1193 RestoreWWMRegisters(WWMCalleeSavedRegs);
1194 if (ScratchExecCopy) {
1195
1198 }
1199}
1200
1206 return;
1207 }
1208
1214
1218 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1220
1222
1223
1225
1227
1228
1230 if (UseSP) {
1231 assert(StackPtrReg != AMDGPU::SP_REG);
1232
1235 }
1236 }
1237
1238 bool HasFP = false;
1239 bool HasBP = false;
1241 uint32_t RoundedSize = NumBytes;
1242
1243 if (TRI.hasStackRealignment(MF))
1244 HasFP = true;
1245
1246 Register FramePtrRegScratchCopy;
1247 if (!HasFP && (MF)) {
1248
1251 FramePtrRegScratchCopy);
1252 } else {
1253
1254 Register SGPRForFPSaveRestoreCopy =
1256
1258 if (SGPRForFPSaveRestoreCopy) {
1259
1260
1261
1263 FramePtrReg,
1265 DL, TII, TRI, LiveUnits, FramePtrReg);
1267 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1268 } else {
1269
1270
1272 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1273 if (!FramePtrRegScratchCopy)
1275
1276 LiveUnits.addReg(FramePtrRegScratchCopy);
1278 .addReg(FramePtrReg);
1279 }
1280 }
1281
1282 if (HasFP) {
1284
1285 RoundedSize += Alignment;
1286 if (LiveUnits.empty()) {
1289 }
1290
1291
1292
1294 .addReg(StackPtrReg)
1301 And->getOperand(3).setIsDead();
1303 } else if ((HasFP = hasFP(MF))) {
1305 .addReg(StackPtrReg)
1307 }
1308
1309
1310 if (HasFP) {
1312 FramePtrRegScratchCopy);
1313 if (FramePtrRegScratchCopy)
1314 LiveUnits.removeReg(FramePtrRegScratchCopy);
1315 }
1316
1317
1318
1319
1320
1321 if ((HasBP = TRI.hasBasePointer(MF))) {
1323 .addReg(StackPtrReg)
1325 }
1326
1327 if (HasFP && RoundedSize != 0) {
1329 .addReg(StackPtrReg)
1332 Add->getOperand(3).setIsDead();
1333 }
1334
1336 (void)FPSaved;
1337 assert((!HasFP || FPSaved) &&
1338 "Needed to save FP but didn't save it anywhere");
1339
1340
1341
1343 "Saved FP but didn't need it");
1344
1346 (void)BPSaved;
1347 assert((!HasBP || BPSaved) &&
1348 "Needed to save BP but didn't save it anywhere");
1349
1350 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1351
1353
1354 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1355 }
1356}
1357
1362 return;
1363
1369
1370
1373 if (.empty()) {
1374 MBBI = MBB.getLastNonDebugInstr();
1376 DL = MBBI->getDebugLoc();
1377
1378 MBBI = MBB.getFirstTerminator();
1379 }
1380
1385 : NumBytes;
1389
1390 if (RoundedSize != 0) {
1391 if (TRI.hasBasePointer(MF)) {
1395 } else if (hasFP(MF)) {
1397 .addReg(FramePtrReg)
1399 }
1400 }
1401
1402 Register FramePtrRegScratchCopy;
1403 Register SGPRForFPSaveRestoreCopy =
1405 if (FPSaved) {
1406
1407
1408
1409
1411 if (SGPRForFPSaveRestoreCopy) {
1412 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1413 } else {
1415 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1416 if (!FramePtrRegScratchCopy)
1418
1419 LiveUnits.addReg(FramePtrRegScratchCopy);
1420 }
1421
1423 FramePtrRegScratchCopy);
1424 }
1425
1426 if (FPSaved) {
1427
1428 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1429 : FramePtrRegScratchCopy;
1433 if (SGPRForFPSaveRestoreCopy)
1435 } else {
1436
1439 FramePtrRegScratchCopy);
1440 }
1441}
1442
1443#ifndef NDEBUG
1452 return false;
1453 }
1454 }
1455
1456 return true;
1457}
1458#endif
1459
1468
1473
1479
1480 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1482
1483 if (SpillVGPRToAGPR) {
1484
1487
1488 bool SeenDbgInstr = false;
1489
1492 int FrameIndex;
1493 if (MI.isDebugInstr())
1494 SeenDbgInstr = true;
1495
1496 if (TII->isVGPRSpill(MI)) {
1497
1498
1499 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1500 AMDGPU::OpName::vaddr);
1501 int FI = MI.getOperand(FIOp).getIndex();
1503 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1505 TRI->isAGPR(MRI, VReg))) {
1506 assert(RS != nullptr);
1507 RS->enterBasicBlockEnd(MBB);
1508 RS->backward(std::next(MI.getIterator()));
1509 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1510 SpillFIs.set(FI);
1511 continue;
1512 }
1513 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1514 TII->isLoadFromStackSlot(MI, FrameIndex))
1516 NonVGPRSpillFIs.set(FrameIndex);
1517 }
1518 }
1519
1520
1521
1522 for (unsigned FI : SpillFIs.set_bits())
1523 if (!NonVGPRSpillFIs.test(FI))
1525
1528 MBB.addLiveIn(Reg);
1529
1531 MBB.addLiveIn(Reg);
1532
1533 MBB.sortUniqueLiveIns();
1534
1535 if (!SpillFIs.empty() && SeenDbgInstr) {
1536
1537
1538
1540 if (MI.isDebugValue()) {
1541 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
1542 if (MI.getOperand(StackOperandIdx).isFI() &&
1544 MI.getOperand(StackOperandIdx).getIndex()) &&
1545 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
1546 MI.getOperand(StackOperandIdx)
1547 .ChangeToRegister(Register(), false );
1548 }
1549 }
1550 }
1551 }
1552 }
1553 }
1554
1555
1556
1557
1558 bool HaveSGPRToVMemSpill =
1561 "SGPR spill should have been removed in SILowerSGPRSpills");
1562
1563
1564
1565
1567 assert(RS && "RegScavenger required if spilling");
1568
1569
1570 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1571
1572
1573
1574 if (HaveSGPRToVMemSpill &&
1577 }
1578 }
1579}
1580
1587
1588 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1589
1590
1591
1592
1595 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1596 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1597 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1598
1599
1600
1601
1603 MRI.reserveReg(UnusedLowVGPR, TRI);
1604 }
1605 }
1606
1607
1610 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1611
1612
1613
1614
1615 if (LongBranchReservedReg && UnusedLowSGPR) {
1617 MRI.reserveReg(UnusedLowSGPR, TRI);
1618 }
1619}
1620
1621
1622
1625 bool NeedExecCopyReservedReg) const {
1633
1634
1636 for (unsigned I = 0; CSRegs[I]; ++I)
1637 LiveUnits.addReg(CSRegs[I]);
1638
1640
1642 if (NeedExecCopyReservedReg ||
1643 (ReservedRegForExecCopy &&
1644 MRI.isPhysRegUsed(ReservedRegForExecCopy, true))) {
1645 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1647 if (UnusedScratchReg) {
1648
1649
1651 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1652 LiveUnits.addReg(UnusedScratchReg);
1653 } else {
1654
1656 "Re-reserving spill slot for EXEC copy register");
1658 false);
1659 }
1660 } else if (ReservedRegForExecCopy) {
1661
1662
1664 }
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674 const bool WillHaveFP =
1677
1678 if (WillHaveFP || hasFP(MF)) {
1681 "Re-reserving spill slot for FP");
1683 }
1684
1685 if (TRI->hasBasePointer(MF)) {
1686 Register BasePtrReg = TRI->getBaseRegister();
1688 "Re-reserving spill slot for BP");
1690 }
1691}
1692
1693
1698
1699
1700
1701
1703 return;
1704
1706
1710 bool NeedExecCopyReservedReg = false;
1711
1715
1716
1717 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1718 NeedExecCopyReservedReg = true;
1719 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1720 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1721 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1723 TII->isChainCallOpcode(MI.getOpcode()))) {
1724
1726 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1727 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1728 ReturnMI = &MI;
1729 }
1730 }
1731 }
1732
1735
1736
1737
1739 if (TRI->getRegSizeInBits(*RC) != 32)
1740 continue;
1742 }
1743
1744 sort(SortedWWMVGPRs, std::greater());
1746
1748 return;
1749
1751
1752
1753 assert(!NeedExecCopyReservedReg &&
1754 "Whole wave functions can use the reg mapped for their i1 argument");
1755
1756
1757 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
1759 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1762 MF.begin()->addLiveIn(Reg);
1763 }
1764 MF.begin()->sortUniqueLiveIns();
1765 }
1766
1767
1768
1769 if (ReturnMI) {
1770 for (auto &Op : ReturnMI->operands()) {
1771 if (Op.isReg())
1772 SavedVGPRs.reset(Op.getReg());
1773 }
1774 }
1775
1776
1780 TRI->getSpillAlign(*RC));
1781 }
1782
1783
1785
1786
1787
1788
1789 if (!ST.hasGFX90AInsts())
1791
1793
1794
1795
1797 SavedVGPRs.reset(Reg.first);
1798}
1799
1806 return;
1807
1810
1811
1813
1814 const BitVector AllSavedRegs = SavedRegs;
1816
1817
1818
1819
1820
1821
1823 const bool WillHaveFP =
1825
1826
1827 if (WillHaveFP || hasFP(MF))
1829
1830
1831
1832
1833
1834
1835
1837 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1839 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1840 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1841 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1842 }
1843}
1844
1847 std::vector &CSI) {
1851
1855 return A.getReg() < B.getReg();
1856 }) &&
1857 "Callee saved registers not sorted");
1858
1860 return !CSI.isSpilledToReg() &&
1861 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1863 };
1864
1865 auto CSEnd = CSI.end();
1866 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1868 if (!CanUseBlockOps(*CSIt))
1869 continue;
1870
1871
1872
1873
1875 CSEnd = std::remove_if(
1876 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1877 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1878 Mask |= 1 << (CSI.getReg() - Reg);
1879 return true;
1880 } else {
1881 return false;
1882 }
1883 });
1884
1887 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1888 if (!RegBlock) {
1889
1890
1891
1892
1894 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1895 RegBlock =
1896 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1897 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1898 "Couldn't find super register");
1899 int RegDelta = Reg - LastBlockStart;
1901 "Bad shift amount");
1902 Mask <<= RegDelta;
1903 }
1904
1906
1907
1908
1909
1910
1912 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1913 int FrameIdx =
1914 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1915 true);
1916 MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);
1917
1918 CSIt->setFrameIdx(FrameIdx);
1919 CSIt->setReg(RegBlock);
1920 }
1921 CSI.erase(CSEnd, CSI.end());
1922}
1923
1926 std::vector &CSI) const {
1927 if (CSI.empty())
1928 return true;
1929
1931 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1932
1933 if (UseVGPRBlocks)
1935
1937}
1938
1941 std::vector &CSI) const {
1942 if (CSI.empty())
1943 return true;
1944
1949 Register BasePtrReg = RI->getBaseRegister();
1950 Register SGPRForFPSaveRestoreCopy =
1952 Register SGPRForBPSaveRestoreCopy =
1954 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1955 return false;
1956
1957 unsigned NumModifiedRegs = 0;
1958
1959 if (SGPRForFPSaveRestoreCopy)
1960 NumModifiedRegs++;
1961 if (SGPRForBPSaveRestoreCopy)
1962 NumModifiedRegs++;
1963
1964 for (auto &CS : CSI) {
1965 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1966 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1967 if (--NumModifiedRegs)
1968 break;
1969 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1970 SGPRForBPSaveRestoreCopy) {
1971 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1972 if (--NumModifiedRegs)
1973 break;
1974 }
1975 }
1976
1977 return false;
1978}
1979
1982
1986 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1987 uint64_t MaxOffset = EstStackSize - 1;
1988
1989
1990
1991
1992
1993
1994
1995
1996 if (ST.enableFlatScratch()) {
1999 return false;
2000 } else {
2001 if (TII->isLegalMUBUFImmOffset(MaxOffset))
2002 return false;
2003 }
2004
2005 return true;
2006}
2007
2013 if (!ST.useVGPRBlockOpsForCSR())
2014 return false;
2015
2020
2022 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
2025 if (!BlockRegClass->contains(Reg) ||
2028 continue;
2029 }
2030
2031
2033 int FrameIndex = CS.getFrameIdx();
2040
2042 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2049
2051
2052
2053
2054
2055
2056 MBB.addLiveIn(Reg);
2057 }
2058 MBB.sortUniqueLiveIns();
2059
2060 return true;
2061}
2062
2068 if (!ST.useVGPRBlockOpsForCSR())
2069 return false;
2070
2078 if (!BlockRegClass->contains(Reg) ||
2081 continue;
2082 }
2083
2084
2086 int FrameIndex = CS.getFrameIdx();
2091 MFI.getObjectAlign(FrameIndex));
2092
2094 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2101
2102
2103
2104
2105
2106 MBB.addLiveIn(Reg);
2107 }
2108
2109 MBB.sortUniqueLiveIns();
2110 return true;
2111}
2112
2117 int64_t Amount = I->getOperand(0).getImm();
2118 if (Amount == 0)
2120
2124 unsigned Opc = I->getOpcode();
2125 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2126 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2127
2130 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2133
2135 if (IsDestroy)
2136 Amount = -Amount;
2140 Add->getOperand(3).setIsDead();
2141 } else if (CalleePopAmount != 0) {
2143 }
2144
2146}
2147
2148
2149
2150
2151
2152
2153
2154
2158
2159
2160
2161
2164
2165
2166
2170
2171
2172
2173
2174
2176 }
2177
2182 MF) ||
2185}
2186
2193
2194
2195
2196
2197
2198
2199
2200
2203
2206 "only expected to call this for entry points and chain functions");
2207
2209
2210
2211
2212
2214 return true;
2215
2216
2217
2219}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
Definition SIFrameLowering.cpp:158
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
Definition SIFrameLowering.cpp:73
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
Definition SIFrameLowering.cpp:178
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
Definition SIFrameLowering.cpp:529
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
Definition SIFrameLowering.cpp:135
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
Definition SIFrameLowering.cpp:948
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
Definition SIFrameLowering.cpp:2155
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition SIFrameLowering.cpp:203
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
Definition SIFrameLowering.cpp:1444
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
Definition SIFrameLowering.cpp:50
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
Definition SIFrameLowering.cpp:35
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)
Definition SIFrameLowering.cpp:1845
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
Definition SIFrameLowering.cpp:593
static const int BlockSize
bool isChainFunction() const
bool isEntryFunction() const
static const LaneMaskConstants & get(const GCNSubtarget &ST)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
bool test(unsigned Idx) const
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
bool any() const
any - Returns true if any bit is set.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
iterator_range< const_set_bits_iterator > set_bits() const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasImplicitBufferPtr() const
bool hasFlatScratchInit() const
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Wrapper class representing physical registers. Should be passed by value.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
void restore()
Definition SIFrameLowering.cpp:370
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
Definition SIFrameLowering.cpp:340
void save()
Definition SIFrameLowering.cpp:359
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
Definition SIFrameLowering.cpp:1623
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Definition SIFrameLowering.cpp:1460
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition SIFrameLowering.cpp:1469
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
Definition SIFrameLowering.cpp:2187
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
Definition SIFrameLowering.cpp:1980
bool requiresStackPointerReference(const MachineFunction &MF) const
Definition SIFrameLowering.cpp:2201
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
Definition SIFrameLowering.cpp:597
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition SIFrameLowering.cpp:1694
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
Definition SIFrameLowering.cpp:992
bool hasFPImpl(const MachineFunction &MF) const override
Definition SIFrameLowering.cpp:2162
bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const
Definition SIFrameLowering.cpp:1939
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
Definition SIFrameLowering.cpp:2008
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
Definition SIFrameLowering.cpp:1924
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
Definition SIFrameLowering.cpp:1800
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition SIFrameLowering.cpp:1358
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
Definition SIFrameLowering.cpp:1097
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
Definition SIFrameLowering.cpp:1581
bool isSupportedStackID(TargetStackID::Value ID) const override
Definition SIFrameLowering.cpp:932
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition SIFrameLowering.cpp:1201
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition SIFrameLowering.cpp:2113
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
Definition SIFrameLowering.cpp:2063
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void setSGPRForEXECCopy(Register Reg)
unsigned getNumPreloadedSGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
bool hasSpilledVGPRs() const
void setVGPRToAGPRSpillDead(int FrameIndex)
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
bool isStackRealigned() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register getFrameOffsetReg() const
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
unsigned getGITPtrHigh() const
bool hasSpilledSGPRs() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
void setScratchRSrcReg(Register Reg)
void reserveWWMRegister(Register Reg)
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const TargetRegisterInfo & getRegisterInfo() const
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ And
Bitwise or logical AND of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.