LLVM: lib/Target/AArch64/AArch64FrameLowering.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
259#include
260#include
261#include
262#include
263#include
264
265using namespace llvm;
266
267#define DEBUG_TYPE "frame-info"
268
270 cl::desc("enable use of redzone on AArch64"),
272
274 "stack-tagging-merge-settag",
275 cl::desc("merge settag instruction in function epilog"), cl::init(true),
277
279 cl::desc("sort stack allocations"),
281
284 cl::desc("Split allocation of ZPR & PPR objects"),
286
288 "homogeneous-prolog-epilog", cl::Hidden,
289 cl::desc("Emit homogeneous prologue and epilogue for the size "
290 "optimization (default = off)"));
291
292
296
300
302 "aarch64-disable-multivector-spill-fill",
303 cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),
305
306int64_t
307AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF,
311 bool IsTailCallReturn = (MBB.end() != MBBI)
314
315 int64_t ArgumentPopSize = 0;
316 if (IsTailCallReturn) {
318
319
320
321
322 ArgumentPopSize = StackAdjust.getImm();
323 } else {
324
325
326
327
329 }
330
331 return ArgumentPopSize;
332}
333
336
338
339
340
343
347
353
356
357
358
361 : 0,
363}
364
365
366
367
372 return true;
373
376
380 return true;
381 }
382
383 return false;
384}
385
389
395
396
397
398
399bool AArch64FrameLowering::homogeneousPrologEpilog(
402 return false;
404 return false;
406 return false;
407
408
410 return false;
411
412
414 return false;
415
416
420 return false;
422 return false;
423
426 return false;
427
428
429
430
432 unsigned NumGPRs = 0;
433 for (unsigned I = 0; CSRegs[I]; ++I) {
435 if (Reg == AArch64::LR) {
436 assert(CSRegs[I + 1] == AArch64::FP);
437 if (NumGPRs % 2 != 0)
438 return false;
439 break;
440 }
441 if (AArch64::GPR64RegClass.contains(Reg))
442 ++NumGPRs;
443 }
444
445 return true;
446}
447
448
449bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
451}
452
453
454
455
456
458
459
460
461
463
464
465
468 if (MI.isDebugInstr() || MI.isPseudo() ||
469 MI.getOpcode() == AArch64::ADDXri ||
470 MI.getOpcode() == AArch64::ADDSXri)
471 continue;
472
474 if (!MO.isFI())
475 continue;
476
480 return 0;
481 }
482 }
483 }
485}
486
491
492unsigned
493AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
495 bool IsWin64, bool IsFunclet) const {
497 "Tail call reserved stack must be aligned to 16 bytes");
498 if (!IsWin64 || IsFunclet) {
500 } else {
503 Attribute::SwiftAsync))
504 report_fatal_error("cannot generate ABI-changing tail call for Win64");
506
507
509
511
517 int FrameIndex = H.CatchObj.FrameIndex;
518 if ((FrameIndex != INT_MAX) &&
519 CatchObjFrameIndices.insert(FrameIndex)) {
520 FixedObjectSize = alignTo(FixedObjectSize,
523 }
524 }
525 }
526
527 FixedObjectSize += 8;
528 }
529 return alignTo(FixedObjectSize, 16);
530 }
531}
532
535 return false;
536
537
538
540 const unsigned RedZoneSize =
542 if (!RedZoneSize)
543 return false;
544
548
549
550
551
552
553 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
555 !Subtarget.hasSVE();
556
557 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
559}
560
561
562
567
568
569
570
572 return true;
573
575 return true;
578 RegInfo->hasStackRealignment(MF))
579 return true;
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
602 return true;
603
604
605
606
607
608
609
610
613 return true;
614
615 return false;
616}
617
618
622
623
624
625 if (TT.isOSDarwin() || TT.isOSWindows())
626 return true;
627
628
630 return true;
631
632
634 return true;
635
636 return false;
637}
638
639
640
641
642
643
653
657
663 unsigned Opc = I->getOpcode();
664 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
665 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
666
668 int64_t Amount = I->getOperand(0).getImm();
670 if (!IsDestroy)
671 Amount = -Amount;
672
673
674
675
676 if (CalleePopAmount == 0) {
677
678
679
680
681
682
683
684
685
686
687 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
688
691
692
693
694
695
696
698 "non-reserved call frame without var sized objects?");
701 inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
702 } else {
705 }
706 }
707 } else if (CalleePopAmount != 0) {
708
709
710 assert(CalleePopAmount < 0xffffff && "call frame too large");
713 }
715}
716
719
722 const auto &TRI = *Subtarget.getRegisterInfo();
724
726
727
728 CFIBuilder.buildDefCFA(AArch64::SP, 0);
729
730
731 if (MFI.shouldSignReturnAddress(MF))
732 MFI.branchProtectionPAuthLR() ? CFIBuilder.buildNegateRAStateWithPC()
733 : CFIBuilder.buildNegateRAState();
734
735
736 if (MFI.needsShadowCallStackPrologueEpilogue(MF))
737 CFIBuilder.buildSameValue(AArch64::X18);
738
739
740 const std::vector &CSI =
742 for (const auto &Info : CSI) {
744 if (.regNeedsCFI(Reg, Reg))
745 continue;
746 CFIBuilder.buildSameValue(Reg);
747 }
748}
749
751 switch (Reg.id()) {
752 default:
753
754
755 return 0;
756
757
758#define CASE(n) \
759 case AArch64::W##n: \
760 case AArch64::X##n: \
761 return AArch64::X##n
781#undef CASE
782
783
784#define CASE(n) \
785 case AArch64::B##n: \
786 case AArch64::H##n: \
787 case AArch64::S##n: \
788 case AArch64::D##n: \
789 case AArch64::Q##n: \
790 return HasSVE ? AArch64::Z##n : AArch64::Q##n
823#undef CASE
824 }
825}
826
827void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
829
831
832
836
837 const MachineFunction &MF = *MBB.getParent();
838 const AArch64Subtarget &STI = MF.getSubtarget();
840
841 BitVector GPRsToZero(TRI.getNumRegs());
842 BitVector FPRsToZero(TRI.getNumRegs());
844 for (MCRegister Reg : RegsToZero.set_bits()) {
845 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
846
848 GPRsToZero.set(XReg);
850
852 FPRsToZero.set(XReg);
853 }
854 }
855
857
858
859 for (MCRegister Reg : GPRsToZero.set_bits())
861
862
863 for (MCRegister Reg : FPRsToZero.set_bits())
865
866 if (HasSVE) {
867 for (MCRegister PReg :
868 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
869 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
870 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
871 AArch64::P15}) {
872 if (RegsToZero[PReg])
874 }
875 }
876}
877
878bool AArch64FrameLowering::windowsRequiresStackProbe(
879 const MachineFunction &MF, uint64_t StackSizeInBytes) const {
880 const AArch64Subtarget &Subtarget = MF.getSubtarget();
881 const AArch64FunctionInfo &MFI = *MF.getInfo();
882
883
884 return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
885 StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
886}
887
892
894 for (unsigned i = 0; CSRegs[i]; ++i)
896}
897
899AArch64FrameLowering::findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
900 bool HasCall) const {
902
903
904
905
908 return AArch64::X9;
909
910 const AArch64Subtarget &Subtarget = MF->getSubtarget();
912 LivePhysRegs LiveRegs(TRI);
914 if (HasCall) {
915 LiveRegs.addReg(AArch64::X16);
916 LiveRegs.addReg(AArch64::X17);
917 LiveRegs.addReg(AArch64::X18);
918 }
919
920
921 const MachineRegisterInfo &MRI = MF->getRegInfo();
922 if (LiveRegs.available(MRI, AArch64::X9))
923 return AArch64::X9;
924
925 for (unsigned Reg : AArch64::GPR64RegClass) {
926 if (LiveRegs.available(MRI, Reg))
927 return Reg;
928 }
929 return AArch64::NoRegister;
930}
931
940
946
947
948 if (.available(MRI, AArch64::X16) ||
950 return false;
951 }
952
953
954
956 MBB.isLiveIn(AArch64::NZCV))
957 return false;
958
960 if (findScratchNonCalleeSaveRegister(TmpMBB) == AArch64::NoRegister)
961 return false;
962
963
964
966 windowsRequiresStackProbe(*MF, std::numeric_limits<uint64_t>::max()))
967 if (findScratchNonCalleeSaveRegister(TmpMBB, true) == AArch64::NoRegister)
968 return false;
969
970 return true;
971}
972
976 F.needsUnwindTableEntry();
977}
978
979bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
981
982
984 return false;
987}
988
989
990
995 unsigned Opc = MBBI->getOpcode();
999 unsigned ImmIdx = MBBI->getNumOperands() - 1;
1000 int Imm = MBBI->getOperand(ImmIdx).getImm();
1004
1005 switch (Opc) {
1006 default:
1008 case AArch64::STR_ZXI:
1009 case AArch64::LDR_ZXI: {
1010 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1011 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveZReg))
1015 break;
1016 }
1017 case AArch64::STR_PXI:
1018 case AArch64::LDR_PXI: {
1019 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1020 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SavePReg))
1024 break;
1025 }
1026 case AArch64::LDPDpost:
1028 [[fallthrough]];
1029 case AArch64::STPDpre: {
1030 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1031 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
1032 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
1037 break;
1038 }
1039 case AArch64::LDPXpost:
1041 [[fallthrough]];
1042 case AArch64::STPXpre: {
1043 Register Reg0 = MBBI->getOperand(1).getReg();
1044 Register Reg1 = MBBI->getOperand(2).getReg();
1045 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1046 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
1049 else
1050 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
1051 .addImm(RegInfo->getSEHRegNum(Reg0))
1052 .addImm(RegInfo->getSEHRegNum(Reg1))
1055 break;
1056 }
1057 case AArch64::LDRDpost:
1059 [[fallthrough]];
1060 case AArch64::STRDpre: {
1061 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1062 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
1066 break;
1067 }
1068 case AArch64::LDRXpost:
1070 [[fallthrough]];
1071 case AArch64::STRXpre: {
1072 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1073 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
1077 break;
1078 }
1079 case AArch64::STPDi:
1080 case AArch64::LDPDi: {
1081 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1082 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1083 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
1088 break;
1089 }
1090 case AArch64::STPXi:
1091 case AArch64::LDPXi: {
1092 Register Reg0 = MBBI->getOperand(0).getReg();
1093 Register Reg1 = MBBI->getOperand(1).getReg();
1094
1095 int SEHReg0 = RegInfo->getSEHRegNum(Reg0);
1096 int SEHReg1 = RegInfo->getSEHRegNum(Reg1);
1097
1098 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1099 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
1102 else if (SEHReg0 >= 19 && SEHReg1 >= 19)
1103 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
1108 else
1109 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegIP))
1114 break;
1115 }
1116 case AArch64::STRXui:
1117 case AArch64::LDRXui: {
1118 int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1119 if (Reg >= 19)
1120 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
1124 else
1125 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegI))
1129 break;
1130 }
1131 case AArch64::STRDui:
1132 case AArch64::LDRDui: {
1133 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1134 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
1138 break;
1139 }
1140 case AArch64::STPQi:
1141 case AArch64::LDPQi: {
1142 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1143 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1144 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP))
1149 break;
1150 }
1151 case AArch64::LDPQpost:
1153 [[fallthrough]];
1154 case AArch64::STPQpre: {
1155 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1156 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
1157 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX))
1162 break;
1163 }
1164 }
1166 return I;
1167}
1168
1172 return false;
1173
1174
1176 if (ST.isTargetDarwin())
1177 return ST.hasSVE();
1178 return true;
1179}
1180
1185
1189
1192 };
1193
1198 DL = MBBI->getDebugLoc();
1199
1202 };
1203
1204
1205 EmitSignRA(MF.front());
1207 if (MBB.isEHFuncletEntry())
1208 EmitSignRA(MBB);
1209 if (MBB.isReturnBlock())
1210 EmitAuthRA(MBB);
1211 }
1212}
1213
1219
1225
1230
1235
1236
1237
1238
1239
1244 MF, FI, FrameReg,
1245
1248 false);
1249}
1250
1253 int FI) const {
1254
1255
1256
1257
1258
1259
1261
1265 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1266
1267
1268
1269
1270 if (MFI.isVariableSizedObjectIndex(FI)) {
1272 }
1273
1274
1275 if (!SVEStackSize)
1277
1280 if (MFI.hasScalableStackID(FI)) {
1281 if (FPAfterSVECalleeSaves &&
1284 "split-sve-objects not supported with FPAfterSVECalleeSaves");
1286 }
1288
1289
1292 AccessOffset = -PPRStackSize;
1293 return AccessOffset +
1295 ObjectOffset);
1296 }
1297
1298 bool IsFixed = MFI.isFixedObjectIndex(FI);
1299 bool IsCSR =
1301
1303 if (!IsFixed && !IsCSR) {
1304 ScalableOffset = -SVEStackSize;
1305 } else if (FPAfterSVECalleeSaves && IsCSR) {
1306 ScalableOffset =
1308 }
1309
1311}
1312
1315 int FI) const {
1317}
1318
1320 int64_t ObjectOffset) const {
1324 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
1325 unsigned FixedObject =
1326 getFixedObjectSize(MF, AFI, IsWin64, false);
1328 int64_t FPAdjust =
1331}
1332
1334 int64_t ObjectOffset) const {
1337}
1338
1339
1341 int FI) const {
1345 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1346 ? getFPOffset(MF, ObjectOffset).getFixed()
1347 : getStackOffset(MF, ObjectOffset).getFixed();
1348}
1349
1352 bool ForSimm) const {
1354 int64_t ObjectOffset = MFI.getObjectOffset(FI);
1355 bool isFixed = MFI.isFixedObjectIndex(FI);
1358 FrameReg, PreferFP, ForSimm);
1359}
1360
1362 const MachineFunction &MF, int64_t ObjectOffset, bool isFixed,
1364 bool ForSimm) const {
1369
1370 int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
1371 int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
1372 bool isCSR =
1374 bool isSVE = MFI.isScalableStackID(StackID);
1375
1378 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1379
1380
1381
1382
1383
1384 bool UseFP = false;
1386
1387
1388
1389 PreferFP &= !SVEStackSize;
1390
1391
1392
1393
1394
1395 if (isFixed) {
1396 UseFP = hasFP(MF);
1397 } else if (isCSR && RegInfo->hasStackRealignment(MF)) {
1398
1399
1400
1401 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1402 UseFP = true;
1403 } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
1404
1405
1406
1407
1408 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1409 PreferFP |= Offset > -FPOffset && !SVEStackSize;
1410
1411 if (FPOffset >= 0) {
1412
1413
1414 UseFP = true;
1415 } else if (MFI.hasVarSizedObjects()) {
1416
1417
1418
1419 bool CanUseBP = RegInfo->hasBasePointer(MF);
1420 if (FPOffsetFits && CanUseBP)
1421 UseFP = PreferFP;
1422 else if (!CanUseBP)
1423 UseFP = true;
1424
1425
1426
1427 } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1428
1429
1430
1431 (void) Subtarget;
1434 "Funclets should only be present on Win64");
1435 UseFP = true;
1436 } else {
1437
1438 if (FPOffsetFits && PreferFP)
1439 UseFP = true;
1440 }
1441 }
1442 }
1443
1445 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
1446 "In the presence of dynamic stack pointer realignment, "
1447 "non-argument/CSR objects cannot be accessed through the frame pointer");
1448
1450
1451 if (isSVE) {
1455 SVEStackSize +
1457 ObjectOffset);
1458
1459
1460
1462
1463
1464 FPOffset -= PPRStackSize;
1465
1466 SPOffset -= PPRStackSize;
1467
1468
1469 }
1470
1471 if (FPAfterSVECalleeSaves) {
1476 }
1477 }
1478
1479
1482 RegInfo->hasStackRealignment(MF))) {
1483 FrameReg = RegInfo->getFrameRegister(MF);
1484 return FPOffset;
1485 }
1486 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
1488
1489 return SPOffset;
1490 }
1491
1493 if (FPAfterSVECalleeSaves) {
1494
1495
1498 if (UseFP) {
1499 if (isFixed)
1500 SVEAreaOffset = SVECalleeSavedStack;
1501 else if (!isCSR)
1502 SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;
1503 } else {
1504 if (isFixed)
1505 SVEAreaOffset = SVEStackSize;
1506 else if (isCSR)
1507 SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;
1508 }
1509 } else {
1510 if (UseFP && !(isFixed || isCSR))
1511 SVEAreaOffset = -SVEStackSize;
1512 if (!UseFP && (isFixed || isCSR))
1513 SVEAreaOffset = SVEStackSize;
1514 }
1515
1516 if (UseFP) {
1517 FrameReg = RegInfo->getFrameRegister(MF);
1519 }
1520
1521
1522 if (RegInfo->hasBasePointer(MF))
1523 FrameReg = RegInfo->getBaseRegister();
1524 else {
1525 assert(!MFI.hasVarSizedObjects() &&
1526 "Can't use SP when we have var sized objects.");
1527 FrameReg = AArch64::SP;
1528
1529
1530
1533 }
1534
1536}
1537
1539
1540
1541
1542
1543
1546}
1547
1555 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
1558}
1559
1561 unsigned SpillCount, unsigned Reg1,
1562 unsigned Reg2, bool NeedsWinCFI,
1563 bool IsFirst,
1565
1566
1567
1568
1569
1570
1571
1572 if (Reg2 == AArch64::FP)
1573 return true;
1574 if (!NeedsWinCFI)
1575 return false;
1576
1577
1578
1579
1580
1581
1582 if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
1583 return SpillExtendedVolatile
1584 ? !((Reg1 == AArch64::FP && Reg2 == AArch64::LR) ||
1585 (SpillCount % 2) == 0)
1586 : false;
1587
1588
1589
1590
1591
1592
1593 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
1594 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
1595 return false;
1596 return true;
1597}
1598
1599
1600
1601
1602
1604 unsigned SpillCount, unsigned Reg1,
1605 unsigned Reg2, bool UsesWinAAPCS,
1606 bool NeedsWinCFI, bool NeedsFrameRecord,
1607 bool IsFirst,
1609 if (UsesWinAAPCS)
1611 Reg1, Reg2, NeedsWinCFI, IsFirst,
1613
1614
1615
1616 if (NeedsFrameRecord)
1617 return Reg2 == AArch64::LR;
1618
1619 return false;
1620}
1621
1622namespace {
1623
1624struct RegPairInfo {
1627 int FrameIdx;
1629 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
1630 const TargetRegisterClass *RC;
1631
1632 RegPairInfo() = default;
1633
1634 bool isPaired() const { return Reg2.isValid(); }
1635
1636 bool isScalable() const { return Type == PPR || Type == ZPR; }
1637};
1638
1639}
1640
1642 for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
1643 if (SavedRegs.test(PReg)) {
1644 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
1646 }
1647 }
1649}
1650
1651
1655 return false;
1656
1658 bool IsLocallyStreaming =
1660
1661
1662
1663
1664 return Subtarget.hasSVE2p1() ||
1665 (Subtarget.hasSME2() &&
1666 (!IsLocallyStreaming && Subtarget.isStreaming()));
1667}
1668
1674 bool NeedsFrameRecord) {
1675
1676 if (CSI.empty())
1677 return;
1678
1685 (void)CC;
1686
1687
1691 (Count & 1) == 0) &&
1692 "Odd number of callee-saved regs to spill!");
1694 int StackFillDir = -1;
1695 int RegInc = 1;
1696 unsigned FirstReg = 0;
1697 if (IsWindows) {
1698
1699 ByteOffset = 0;
1700 StackFillDir = 1;
1701
1702
1703 RegInc = -1;
1704 FirstReg = Count - 1;
1705 }
1706
1708
1709
1710
1711
1712
1713
1714
1715
1716 bool SpillExtendedVolatile =
1718 const auto &Reg = CSI.getReg();
1719 return Reg >= AArch64::X0 && Reg <= AArch64::X18;
1720 });
1721
1722 int ZPRByteOffset = 0;
1723 int PPRByteOffset = 0;
1725 if (SplitPPRs) {
1728 } else if (!FPAfterSVECalleeSaves) {
1729 ZPRByteOffset =
1731
1732 PPRByteOffset = 0;
1733 }
1734
1738
1739
1740 for (unsigned i = FirstReg; i < Count; i += RegInc) {
1741 RegPairInfo RPI;
1742 RPI.Reg1 = CSI[i].getReg();
1743
1744 if (AArch64::GPR64RegClass.contains(RPI.Reg1)) {
1745 RPI.Type = RegPairInfo::GPR;
1746 RPI.RC = &AArch64::GPR64RegClass;
1747 } else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) {
1748 RPI.Type = RegPairInfo::FPR64;
1749 RPI.RC = &AArch64::FPR64RegClass;
1750 } else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) {
1751 RPI.Type = RegPairInfo::FPR128;
1752 RPI.RC = &AArch64::FPR128RegClass;
1753 } else if (AArch64::ZPRRegClass.contains(RPI.Reg1)) {
1754 RPI.Type = RegPairInfo::ZPR;
1755 RPI.RC = &AArch64::ZPRRegClass;
1756 } else if (AArch64::PPRRegClass.contains(RPI.Reg1)) {
1757 RPI.Type = RegPairInfo::PPR;
1758 RPI.RC = &AArch64::PPRRegClass;
1759 } else if (RPI.Reg1 == AArch64::VG) {
1760 RPI.Type = RegPairInfo::VG;
1761 RPI.RC = &AArch64::FIXED_REGSRegClass;
1762 } else {
1764 }
1765
1766 int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs
1767 ? PPRByteOffset
1768 : ZPRByteOffset;
1769
1770
1771 if (HasCSHazardPadding &&
1774 ByteOffset += StackFillDir * StackHazardSize;
1775 LastReg = RPI.Reg1;
1776
1778 int Scale = TRI->getSpillSize(*RPI.RC);
1779
1780 if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
1781 MCRegister NextReg = CSI[i + RegInc].getReg();
1782 bool IsFirst = i == FirstReg;
1783 unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
1784 switch (RPI.Type) {
1785 case RegPairInfo::GPR:
1786 if (AArch64::GPR64RegClass.contains(NextReg) &&
1788 SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
1789 NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
1790 RPI.Reg2 = NextReg;
1791 break;
1792 case RegPairInfo::FPR64:
1793 if (AArch64::FPR64RegClass.contains(NextReg) &&
1795 SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
1796 NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
1797 RPI.Reg2 = NextReg;
1798 break;
1799 case RegPairInfo::FPR128:
1800 if (AArch64::FPR128RegClass.contains(NextReg))
1801 RPI.Reg2 = NextReg;
1802 break;
1803 case RegPairInfo::PPR:
1804 break;
1805 case RegPairInfo::ZPR:
1807 ((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {
1808
1809
1810 int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;
1812 RPI.Reg2 = NextReg;
1813 }
1814 break;
1815 case RegPairInfo::VG:
1816 break;
1817 }
1818 }
1819
1820
1821
1822
1823
1824
1825
1826 assert((!RPI.isPaired() ||
1827 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
1828 "Out of order callee saved regs!");
1829
1830 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
1831 RPI.Reg1 == AArch64::LR) &&
1832 "FrameRecord must be allocated together with LR");
1833
1834
1835 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
1836 RPI.Reg2 == AArch64::LR) &&
1837 "FrameRecord must be allocated together with LR");
1838
1839
1840
1844 (RPI.isPaired() &&
1845 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1846 RPI.Reg1 + 1 == RPI.Reg2))) &&
1847 "Callee-save registers not saved as adjacent register pair!");
1848
1849 RPI.FrameIdx = CSI[i].getFrameIdx();
1850 if (IsWindows &&
1851 RPI.isPaired())
1852 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
1853
1854
1855
1856 if (RPI.isScalable() && ScalableByteOffset % Scale != 0) {
1857 ScalableByteOffset = alignTo(ScalableByteOffset, Scale);
1858 }
1859
1860 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
1861 assert(OffsetPre % Scale == 0);
1862
1863 if (RPI.isScalable())
1864 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
1865 else
1866 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
1867
1868
1869
1871 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
1872 (IsWindows && RPI.Reg2 == AArch64::LR)))
1873 ByteOffset += StackFillDir * 8;
1874
1875
1876
1877 if (NeedGapToAlignStack && !IsWindows && !RPI.isScalable() &&
1878 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
1879 ByteOffset % 16 != 0) {
1880 ByteOffset += 8 * StackFillDir;
1882
1883
1884
1886 NeedGapToAlignStack = false;
1887 }
1888
1889 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
1890 assert(OffsetPost % Scale == 0);
1891
1892
1893 int Offset = IsWindows ? OffsetPre : OffsetPost;
1894
1895
1896
1898 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
1899 (IsWindows && RPI.Reg2 == AArch64::LR)))
1901 RPI.Offset = Offset / Scale;
1902
1903 assert((!RPI.isPaired() ||
1904 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
1905 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
1906 "Offset out of bounds for LDP/STP immediate");
1907
1908 auto isFrameRecord = [&] {
1909 if (RPI.isPaired())
1910 return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
1911 : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
1912
1913
1914
1915
1916
1917
1918
1919 return i > 0 && RPI.Reg1 == AArch64::FP &&
1920 CSI[i - 1].getReg() == AArch64::LR;
1921 };
1922
1923
1924
1925 if (NeedsFrameRecord && isFrameRecord())
1927
1929 if (RPI.isPaired())
1930 i += RegInc;
1931 }
1932 if (IsWindows) {
1933
1934
1935
1936
1937
1940
1941
1942 std::reverse(RegPairs.begin(), RegPairs.end());
1943 }
1944}
1945
1955
1957
1959
1960
1961 MRI.freezeReservedRegs();
1962
1963 if (homogeneousPrologEpilog(MF)) {
1966
1967 for (auto &RPI : RegPairs) {
1968 MIB.addReg(RPI.Reg1);
1969 MIB.addReg(RPI.Reg2);
1970
1971
1972 if (.isReserved(RPI.Reg1))
1973 MBB.addLiveIn(RPI.Reg1);
1974 if (RPI.isPaired() && .isReserved(RPI.Reg2))
1975 MBB.addLiveIn(RPI.Reg2);
1976 }
1977 return true;
1978 }
1979 bool PTrueCreated = false;
1980 for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
1983 unsigned StrOpc;
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995 unsigned Size = TRI->getSpillSize(*RPI.RC);
1996 Align Alignment = TRI->getSpillAlign(*RPI.RC);
1997 switch (RPI.Type) {
1998 case RegPairInfo::GPR:
1999 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
2000 break;
2001 case RegPairInfo::FPR64:
2002 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
2003 break;
2004 case RegPairInfo::FPR128:
2005 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
2006 break;
2007 case RegPairInfo::ZPR:
2008 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
2009 break;
2010 case RegPairInfo::PPR:
2011 StrOpc = AArch64::STR_PXI;
2012 break;
2013 case RegPairInfo::VG:
2014 StrOpc = AArch64::STRXui;
2015 break;
2016 }
2017
2020 if (X0Scratch != AArch64::NoRegister)
2024 });
2025
2026 if (Reg1 == AArch64::VG) {
2027
2028 Reg1 = findScratchNonCalleeSaveRegister(&MBB, true);
2029 assert(Reg1 != AArch64::NoRegister);
2035 } else {
2039 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
2040 AArch64::X0, LiveIn.PhysReg);
2041 })) {
2042 X0Scratch = Reg1;
2044 .addReg(AArch64::X0)
2046 }
2047
2048 RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG;
2050 TRI->getCallPreservedMask(MF, TLI.getLibcallCallingConv(LC));
2056 Reg1 = AArch64::X0;
2057 }
2058 }
2059
2062 if (RPI.isPaired())
2064 dbgs() << ") -> fi#(" << RPI.FrameIdx;
2065 if (RPI.isPaired())
2066 dbgs() << ", " << RPI.FrameIdx + 1;
2067 dbgs() << ")\n";
2068 });
2069
2071 !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
2072 "Windows unwdinding requires a consecutive (FP,LR) pair");
2073
2074
2075
2076 unsigned FrameIdxReg1 = RPI.FrameIdx;
2077 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2080 std::swap(FrameIdxReg1, FrameIdxReg2);
2081 }
2082
2083 if (RPI.isPaired() && RPI.isScalable()) {
2089 "Expects SVE2.1 or SME2 target and a predicate register");
2090#ifdef EXPENSIVE_CHECKS
2091 auto IsPPR = [](const RegPairInfo &c) {
2092 return c.Reg1 == RegPairInfo::PPR;
2093 };
2094 auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
2095 auto IsZPR = [](const RegPairInfo &c) {
2096 return c.Type == RegPairInfo::ZPR;
2097 };
2098 auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
2099 assert(!(PPRBegin < ZPRBegin) &&
2100 "Expected callee save predicate to be handled first");
2101#endif
2102 if (!PTrueCreated) {
2103 PTrueCreated = true;
2106 }
2108 if (.isReserved(Reg1))
2109 MBB.addLiveIn(Reg1);
2110 if (.isReserved(Reg2))
2111 MBB.addLiveIn(Reg2);
2112 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
2117 MIB.addReg(AArch64::SP)
2118 .addImm(RPI.Offset / 2)
2119
2124 if (NeedsWinCFI)
2126 } else {
2128 if (.isReserved(Reg1))
2129 MBB.addLiveIn(Reg1);
2130 if (RPI.isPaired()) {
2131 if (.isReserved(Reg2))
2132 MBB.addLiveIn(Reg2);
2137 }
2139 .addReg(AArch64::SP)
2140 .addImm(RPI.Offset)
2141
2146 if (NeedsWinCFI)
2148 }
2149
2151 if (RPI.Type == RegPairInfo::ZPR) {
2153 if (RPI.isPaired())
2155 } else if (RPI.Type == RegPairInfo::PPR) {
2157 if (RPI.isPaired())
2159 }
2160 }
2161 return true;
2162}
2163
2172
2174 DL = MBBI->getDebugLoc();
2175
2177 if (homogeneousPrologEpilog(MF, &MBB)) {
2180 for (auto &RPI : RegPairs) {
2183 }
2184 return true;
2185 }
2186
2187
2188 auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
2190 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);
2191 std::reverse(PPRBegin, PPREnd);
2192 auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
2194 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);
2195 std::reverse(ZPRBegin, ZPREnd);
2196
2197 bool PTrueCreated = false;
2198 for (const RegPairInfo &RPI : RegPairs) {
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210 unsigned LdrOpc;
2211 unsigned Size = TRI->getSpillSize(*RPI.RC);
2212 Align Alignment = TRI->getSpillAlign(*RPI.RC);
2213 switch (RPI.Type) {
2214 case RegPairInfo::GPR:
2215 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2216 break;
2217 case RegPairInfo::FPR64:
2218 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2219 break;
2220 case RegPairInfo::FPR128:
2221 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2222 break;
2223 case RegPairInfo::ZPR:
2224 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
2225 break;
2226 case RegPairInfo::PPR:
2227 LdrOpc = AArch64::LDR_PXI;
2228 break;
2229 case RegPairInfo::VG:
2230 continue;
2231 }
2234 if (RPI.isPaired())
2236 dbgs() << ") -> fi#(" << RPI.FrameIdx;
2237 if (RPI.isPaired())
2238 dbgs() << ", " << RPI.FrameIdx + 1;
2239 dbgs() << ")\n";
2240 });
2241
2242
2243
2244
2245 unsigned FrameIdxReg1 = RPI.FrameIdx;
2246 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2249 std::swap(FrameIdxReg1, FrameIdxReg2);
2250 }
2251
2253 if (RPI.isPaired() && RPI.isScalable()) {
2258 "Expects SVE2.1 or SME2 target and a predicate register");
2259#ifdef EXPENSIVE_CHECKS
2260 assert(!(PPRBegin < ZPRBegin) &&
2261 "Expected callee save predicate to be handled first");
2262#endif
2263 if (!PTrueCreated) {
2264 PTrueCreated = true;
2267 }
2269 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
2275 MIB.addReg(AArch64::SP)
2276 .addImm(RPI.Offset / 2)
2277
2282 if (NeedsWinCFI)
2284 } else {
2286 if (RPI.isPaired()) {
2291 }
2293 MIB.addReg(AArch64::SP)
2294 .addImm(RPI.Offset)
2295
2300 if (NeedsWinCFI)
2302 }
2303 }
2304 return true;
2305}
2306
2307
2310 auto *PSV =
2312 if (PSV)
2313 return std::optional(PSV->getFrameIndex());
2314
2318 FI++)
2320 return FI;
2321 }
2322 }
2323
2324 return std::nullopt;
2325}
2326
2327
2330 if (.mayLoadOrStore() || MI.getNumMemOperands() < 1)
2331 return std::nullopt;
2332
2334}
2335
2336
2338 return AArch64::PPRRegClass.contains(MI.getOperand(0).getReg());
2339}
2340
2341
2342
2343
2344void AArch64FrameLowering::determineStackHazardSlot(
2347 auto *AFI = MF.getInfo();
2348 if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
2350 return;
2351
2352
2355 return;
2356
2358
2359
2360
2361 bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
2362 return AArch64::FPR64RegClass.contains(Reg) ||
2363 AArch64::FPR128RegClass.contains(Reg) ||
2364 AArch64::ZPRRegClass.contains(Reg);
2365 });
2366 bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
2367 return AArch64::PPRRegClass.contains(Reg);
2368 });
2369 bool HasFPRStackObjects = false;
2370 bool HasPPRStackObjects = false;
2372 enum SlotType : uint8_t {
2374 ZPRorFPR = 1 << 0,
2375 PPR = 1 << 1,
2376 GPR = 1 << 2,
2378 };
2379
2380
2381
2383 for (auto &MBB : MF) {
2386 if (!FI || FI < 0 || FI > int(SlotTypes.size()))
2387 continue;
2389 SlotTypes[*FI] |=
2390 isPPRAccess(MI) ? SlotType::PPR : SlotType::ZPRorFPR;
2391 } else {
2393 ? SlotType::ZPRorFPR
2394 : SlotType::GPR;
2395 }
2396 }
2397 }
2398
2399 for (int FI = 0; FI < int(SlotTypes.size()); ++FI) {
2400 HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;
2401
2402
2403 if (SlotTypes[FI] == SlotType::PPR) {
2405 HasPPRStackObjects = true;
2406 }
2407 }
2408 }
2409
2410 if (HasFPRCSRs || HasFPRStackObjects) {
2412 LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size "
2413 << StackHazardSize << "\n");
2415 }
2416
2418 return;
2419
2424 LLVM_DEBUG(dbgs() << "Using SplitSVEObjects for SVE CC function\n");
2425 return;
2426 }
2427
2428
2429
2430 LLVM_DEBUG(dbgs() << "Determining if SplitSVEObjects should be used in "
2431 "non-SVE CC function...\n");
2432
2433
2434
2438 << "Calling convention is not supported with SplitSVEObjects\n");
2439 return;
2440 }
2441
2442 if (!HasPPRCSRs && !HasPPRStackObjects) {
2444 dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
2445 return;
2446 }
2447
2448 if (!HasFPRCSRs && !HasFPRStackObjects) {
2451 << "Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");
2452 return;
2453 }
2454
2455 [[maybe_unused]] const AArch64Subtarget &Subtarget =
2456 MF.getSubtarget();
2458 "Expected SVE to be available for PPRs");
2459
2460 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2461
2462
2463
2464 BitVector FPRZRegs(SavedRegs.size());
2465 for (size_t Reg = 0, E = SavedRegs.size(); HasFPRCSRs && Reg < E; ++Reg) {
2466 BitVector::reference RegBit = SavedRegs[Reg];
2467 if (!RegBit)
2468 continue;
2469 unsigned SubRegIdx = 0;
2470 if (AArch64::FPR64RegClass.contains(Reg))
2471 SubRegIdx = AArch64::dsub;
2472 else if (AArch64::FPR128RegClass.contains(Reg))
2473 SubRegIdx = AArch64::zsub;
2474 else
2475 continue;
2476
2477 RegBit = false;
2478
2480 TRI->getMatchingSuperReg(Reg, SubRegIdx, &AArch64::ZPRRegClass);
2481 FPRZRegs.set(ZReg);
2482 }
2483 SavedRegs |= FPRZRegs;
2484
2487 }
2488}
2489
2493
2494
2496 return;
2497
2499
2503 unsigned UnspilledCSGPR = AArch64::NoRegister;
2504 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2505
2508
2510 RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() : MCRegister();
2511
2512 unsigned ExtraCSSpill = 0;
2513 bool HasUnpairedGPR64 = false;
2514 bool HasPairZReg = false;
2515 BitVector UserReservedRegs = RegInfo->getUserReservedRegs(MF);
2516 BitVector ReservedRegs = RegInfo->getReservedRegs(MF);
2517
2518
2519 for (unsigned i = 0; CSRegs[i]; ++i) {
2521
2522
2523 if (Reg == BasePointerReg)
2524 SavedRegs.set(Reg);
2525
2526
2527
2528 if (UserReservedRegs[Reg]) {
2529 SavedRegs.reset(Reg);
2530 continue;
2531 }
2532
2533 bool RegUsed = SavedRegs.test(Reg);
2535 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
2536 if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
2537 AArch64::FPR128RegClass.contains(Reg)) {
2538
2539
2540 if (HasUnpairedGPR64)
2541 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
2542 else
2543 PairedReg = CSRegs[i ^ 1];
2544 }
2545
2546
2547
2548
2549
2550 if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
2551 PairedReg = AArch64::NoRegister;
2552 HasUnpairedGPR64 = true;
2553 }
2554 assert(PairedReg == AArch64::NoRegister ||
2555 AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
2556 AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
2557 AArch64::FPR128RegClass.contains(Reg, PairedReg));
2558
2559 if (!RegUsed) {
2560 if (AArch64::GPR64RegClass.contains(Reg) && !ReservedRegs[Reg]) {
2561 UnspilledCSGPR = Reg;
2562 UnspilledCSGPRPaired = PairedReg;
2563 }
2564 continue;
2565 }
2566
2567
2568
2569
2570 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
2571 !SavedRegs.test(PairedReg)) {
2572 SavedRegs.set(PairedReg);
2573 if (AArch64::GPR64RegClass.contains(PairedReg) &&
2574 !ReservedRegs[PairedReg])
2575 ExtraCSSpill = PairedReg;
2576 }
2577
2578 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
2579 SavedRegs.test(CSRegs[i ^ 1]));
2580 }
2581
2584
2585
2587 if (PnReg.isValid())
2589
2593 SavedRegs.set(AArch64::P8);
2595 }
2596
2598 "Predicate cannot be a reserved register");
2599 }
2600
2603
2604
2605
2606
2607
2608 SavedRegs.set(AArch64::X18);
2609 }
2610
2611
2612
2613
2614 determineStackHazardSlot(MF, SavedRegs);
2615
2616
2617 unsigned CSStackSize = 0;
2618 unsigned ZPRCSStackSize = 0;
2619 unsigned PPRCSStackSize = 0;
2621 for (unsigned Reg : SavedRegs.set_bits()) {
2622 auto *RC = TRI->getMinimalPhysRegClass(MCRegister(Reg));
2623 assert(RC && "expected register class!");
2624 auto SpillSize = TRI->getSpillSize(*RC);
2625 bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
2626 bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
2627 if (IsZPR)
2628 ZPRCSStackSize += SpillSize;
2629 else if (IsPPR)
2630 PPRCSStackSize += SpillSize;
2631 else
2632 CSStackSize += SpillSize;
2633 }
2634
2635
2636
2637
2638 unsigned NumSavedRegs = SavedRegs.count();
2639
2640
2643
2644
2645
2647 CSStackSize += 8;
2648
2649
2651 SavedRegs.set(AArch64::LR);
2652
2653
2655 if (hasFP(MF) ||
2656 windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2657 SavedRegs.set(AArch64::FP);
2658 SavedRegs.set(AArch64::LR);
2659 }
2660
2662 dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
2663 for (unsigned Reg : SavedRegs.set_bits())
2665 dbgs() << "\n";
2666 });
2667
2668
2669 auto [ZPRLocalStackSize, PPRLocalStackSize] =
2671 uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
2673 alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
2674 bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
2675
2676
2677
2679
2680
2681
2682
2683 int64_t CalleeStackUsed = 0;
2686 if (FixedOff > CalleeStackUsed)
2687 CalleeStackUsed = FixedOff;
2688 }
2689
2690
2691 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
2692 CalleeStackUsed) > EstimatedStackSizeLimit;
2693 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2695
2696
2697
2698
2699
2700
2701
2702 if (BigStack) {
2703 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2705 << " to get a scratch register.\n");
2706 SavedRegs.set(UnspilledCSGPR);
2707 ExtraCSSpill = UnspilledCSGPR;
2708
2709
2710
2711
2712 if (producePairRegisters(MF)) {
2713 if (UnspilledCSGPRPaired == AArch64::NoRegister) {
2714
2716 SavedRegs.reset(UnspilledCSGPR);
2717 ExtraCSSpill = AArch64::NoRegister;
2718 }
2719 } else
2720 SavedRegs.set(UnspilledCSGPRPaired);
2721 }
2722 }
2723
2724
2725
2729 unsigned Size = TRI->getSpillSize(RC);
2730 Align Alignment = TRI->getSpillAlign(RC);
2732 RS->addScavengingFrameIndex(FI);
2733 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2734 << " as the emergency spill slot.\n");
2735 }
2736 }
2737
2738
2739 CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2740
2741
2742
2744 CSStackSize += 8;
2745
2748 << EstimatedStackSize + AlignedCSStackSize << " bytes.\n");
2749
2752 "Should not invalidate callee saved info");
2753
2754
2755
2759}
2760
2763 std::vector &CSI) const {
2766
2767
2768
2769
2770
2771 if (IsWindows)
2772 std::reverse(CSI.begin(), CSI.end());
2773
2774 if (CSI.empty())
2775 return true;
2776
2777
2778
2781
2786 }
2787
2788
2791 auto It =
2792 find_if(CSI, [](auto &Info) { return Info.getReg() == AArch64::LR; });
2793 if (It != CSI.end())
2794 CSI.insert(It, VGInfo);
2795 else
2796 CSI.push_back(VGInfo);
2797 }
2798
2800 int HazardSlotIndex = std::numeric_limits::max();
2801 for (auto &CS : CSI) {
2804
2805
2809 assert(HazardSlotIndex == std::numeric_limits::max() &&
2810 "Unexpected register order for hazard slot");
2812 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
2813 << "\n");
2816 }
2817
2818 unsigned Size = RegInfo->getSpillSize(*RC);
2819 Align Alignment(RegInfo->getSpillAlign(*RC));
2821 CS.setFrameIdx(FrameIdx);
2823
2824
2826 Reg == AArch64::FP) {
2830 }
2831 LastReg = Reg;
2832 }
2833
2834
2836 HazardSlotIndex == std::numeric_limits::max()) {
2838 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
2839 << "\n");
2842 }
2843
2844 return true;
2845}
2846
2850
2851
2852
2853
2855 return false;
2856
2857
2859 return false;
2861}
2862
2863
2865 int &Min, int &Max) {
2866 Min = std::numeric_limits::max();
2867 Max = std::numeric_limits::min();
2868
2870 return false;
2871
2873 for (auto &CS : CSI) {
2874 if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
2875 AArch64::PPRRegClass.contains(CS.getReg())) {
2876 assert((Max == std::numeric_limits::min() ||
2877 Max + 1 == CS.getFrameIdx()) &&
2878 "SVE CalleeSaves are not consecutive");
2879 Min = std::min(Min, CS.getFrameIdx());
2880 Max = std::max(Max, CS.getFrameIdx());
2881 }
2882 }
2883 return Min != std::numeric_limits::max();
2884}
2885
2890
2892
2893
2894
2895
2896 uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
2898 AFI->hasSplitSVEObjects() ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
2899
2900#ifndef NDEBUG
2901
2904 "SVE vectors should never be passed on the stack by value, only by "
2905 "reference.");
2906#endif
2907
2908 auto AllocateObject = [&](int FI) {
2910 ? ZPRStackTop
2911 : PPRStackTop;
2912
2913
2914
2915
2917 if (Alignment > Align(16))
2919 "Alignment of scalable vectors > 16 bytes is not yet supported");
2920
2922 StackTop = alignTo(StackTop, Alignment);
2923
2924 assert(StackTop < (uint64_t)std::numeric_limits<int64_t>::max() &&
2925 "SVE StackTop far too large?!");
2926
2927 int64_t Offset = -int64_t(StackTop);
2930
2932 };
2933
2934
2935 int MinCSFrameIndex, MaxCSFrameIndex;
2937 for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)
2938 AllocateObject(FI);
2939 }
2940
2941
2942 PPRStackTop = alignTo(PPRStackTop, Align(16U));
2943 ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
2944
2945
2947
2948
2949
2950 int StackProtectorFI = -1;
2954 ObjectsToAllocate.push_back(StackProtectorFI);
2955 }
2956
2960 continue;
2961
2964 continue;
2965
2966 ObjectsToAllocate.push_back(FI);
2967 }
2968
2969
2970 for (unsigned FI : ObjectsToAllocate)
2971 AllocateObject(FI);
2972
2973 PPRStackTop = alignTo(PPRStackTop, Align(16U));
2974 ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
2975
2977 AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize);
2978
2979 return SVEStack;
2980}
2981
2985 "Upwards growing stack unsupported");
2986
2988
2989
2990
2992 return;
2993
2996
2997
2998
3000 int64_t CurrentOffset =
3004 int FrameIndex = H.CatchObj.FrameIndex;
3005 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {
3006 CurrentOffset =
3010 }
3011 }
3012 }
3013
3014
3015
3016 int64_t UnwindHelpOffset = alignTo(CurrentOffset + 8, Align(16));
3017 assert(UnwindHelpOffset == getFixedObjectSize(MF, AFI, true,
3018 false) &&
3019 "UnwindHelpOffset must be at the start of the fixed object area");
3020 int UnwindHelpFI = MFI.CreateFixedObject( 8, -UnwindHelpOffset,
3021 false);
3022 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
3023
3028
3029
3030
3032 RS->enterBasicBlockEnd(MBB);
3033 RS->backward(MBBI);
3034 Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
3035 assert(DstReg && "There must be a free register after frame setup");
3042}
3043
3044namespace {
3045struct TagStoreInstr {
3050};
3051
3052class TagStoreEdit {
3053 MachineFunction *MF;
3054 MachineBasicBlock *MBB;
3055 MachineRegisterInfo *MRI;
3056
3058
3060
3061
3062
3064 StackOffset FrameRegOffset;
3065 int64_t Size;
3066
3067
3068 std::optional<int64_t> FrameRegUpdate;
3069
3070 unsigned FrameRegUpdateFlags;
3071
3072
3073 bool ZeroData;
3075
3078
3079public:
3080 TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
3081 : MBB(MBB), ZeroData(ZeroData) {
3084 }
3085
3086
3087 void addInstruction(TagStoreInstr I) {
3089 TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
3090 "Non-adjacent tag store instructions.");
3092 }
3093 void clear() { TagStores.clear(); }
3094
3095
3096
3098 const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
3099};
3100
3104
3105 const int64_t kMinOffset = -256 * 16;
3106 const int64_t kMaxOffset = 255 * 16;
3107
3109 int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
3110 if (BaseRegOffsetBytes < kMinOffset ||
3111 BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset ||
3112
3113
3114
3115 BaseRegOffsetBytes % 16 != 0) {
3116 Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3120 BaseRegOffsetBytes = 0;
3121 }
3122
3124 while (Size) {
3125 int64_t InstrSize = (Size > 16) ? 32 : 16;
3126 unsigned Opcode =
3127 InstrSize == 16
3128 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
3130 assert(BaseRegOffsetBytes % 16 == 0);
3132 .addReg(AArch64::SP)
3134 .addImm(BaseRegOffsetBytes / 16)
3136
3137
3138 if (BaseRegOffsetBytes == 0)
3139 LastI = I;
3140 BaseRegOffsetBytes += InstrSize;
3141 Size -= InstrSize;
3142 }
3143
3144 if (LastI)
3146}
3147
3151
3153 ? FrameReg
3154 : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3155 Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3156
3158
3159 int64_t LoopSize = Size;
3160
3161
3162 if (FrameRegUpdate && *FrameRegUpdate)
3163 LoopSize -= LoopSize % 32;
3165 TII->get(ZeroData ? AArch64::STZGloop_wback
3166 : AArch64::STGloop_wback))
3172 if (FrameRegUpdate)
3173 LoopI->setFlags(FrameRegUpdateFlags);
3174
3175 int64_t ExtraBaseRegUpdate =
3176 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
3177 LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize
3178 << ", Size=" << Size
3179 << ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
3180 << ", FrameRegUpdate=" << FrameRegUpdate
3181 << ", FrameRegOffset.getFixed()="
3182 << FrameRegOffset.getFixed() << "\n");
3183 if (LoopSize < Size) {
3184 assert(FrameRegUpdate);
3186
3187 int64_t STGOffset = ExtraBaseRegUpdate + 16;
3188 assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
3189 "STG immediate out of range");
3191 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
3195 .addImm(STGOffset / 16)
3198 } else if (ExtraBaseRegUpdate) {
3199
3200 int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
3201 assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range");
3204 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
3207 .addImm(AddSubOffset)
3210 }
3211}
3212
3213
3214
3215
3217 int64_t Size, int64_t *TotalOffset) {
3219 if ((MI.getOpcode() == AArch64::ADDXri ||
3220 MI.getOpcode() == AArch64::SUBXri) &&
3221 MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
3223 int64_t Offset = MI.getOperand(2).getImm() << Shift;
3224 if (MI.getOpcode() == AArch64::SUBXri)
3227
3228
3229
3230
3231
3232
3233
3234
3235 const int64_t kMaxOffset = 4080 - 16;
3236
3237 const int64_t kMinOffset = -4095;
3238 if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
3239 PostOffset % 16 == 0) {
3240 *TotalOffset = Offset;
3241 return true;
3242 }
3243 }
3244 return false;
3245}
3246
3249 MemRefs.clear();
3250 for (auto &TS : TSE) {
3252
3253
3254 if (MI->memoperands_empty()) {
3255 MemRefs.clear();
3256 return;
3257 }
3258 MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
3259 }
3260}
3261
3264 bool TryMergeSPUpdate) {
3265 if (TagStores.empty())
3266 return;
3267 TagStoreInstr &FirstTagStore = TagStores[0];
3268 TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
3269 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
3270 DL = TagStores[0].MI->getDebugLoc();
3271
3274 *MF, FirstTagStore.Offset, false ,
3276 false, true);
3277 FrameReg = Reg;
3278 FrameRegUpdate = std::nullopt;
3279
3280 mergeMemRefs(TagStores, CombinedMemRefs);
3281
3283 dbgs() << "Replacing adjacent STG instructions:\n";
3284 for (const auto &Instr : TagStores) {
3286 }
3287 });
3288
3289
3290
3293 if (TagStores.size() < 2)
3294 return;
3295 emitUnrolled(InsertI);
3296 } else {
3298 int64_t TotalOffset = 0;
3299 if (TryMergeSPUpdate) {
3300
3301
3302
3303
3304
3305 if (InsertI != MBB->end() &&
3306 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
3307 &TotalOffset)) {
3308 UpdateInstr = &*InsertI++;
3309 LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
3310 << *UpdateInstr);
3311 }
3312 }
3313
3314 if (!UpdateInstr && TagStores.size() < 2)
3315 return;
3316
3317 if (UpdateInstr) {
3318 FrameRegUpdate = TotalOffset;
3319 FrameRegUpdateFlags = UpdateInstr->getFlags();
3320 }
3321 emitLoop(InsertI);
3322 if (UpdateInstr)
3324 }
3325
3326 for (auto &TS : TagStores)
3327 TS.MI->eraseFromParent();
3328}
3329
3331 int64_t &Size, bool &ZeroData) {
3334
3335 unsigned Opcode = MI.getOpcode();
3336 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
3337 Opcode == AArch64::STZ2Gi);
3338
3339 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
3340 if (.getOperand(0).isDead() ||
.getOperand(1).isDead())
3341 return false;
3342 if (.getOperand(2).isImm() ||
.getOperand(3).isFI())
3343 return false;
3345 Size = MI.getOperand(2).getImm();
3346 return true;
3347 }
3348
3349 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
3351 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
3353 else
3354 return false;
3355
3356 if (MI.getOperand(0).getReg() != AArch64::SP || .getOperand(1).isFI())
3357 return false;
3358
3360 16 * MI.getOperand(2).getImm();
3361 return true;
3362}
3363
3364
3365
3366
3367
3368
3369
3373 bool FirstZeroData;
3379 return II;
3380 if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
3381 return II;
3382
3385
3386 constexpr int kScanLimit = 10;
3389 NextI != E && Count < kScanLimit; ++NextI) {
3391 bool ZeroData;
3393
3394
3395
3396
3397
3398 if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
3399 if (ZeroData != FirstZeroData)
3400 break;
3402 continue;
3403 }
3404
3405
3406
3407 if (.isTransient())
3409
3410
3413 break;
3414
3415
3416 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() || MI.isCall())
3417 break;
3418 }
3419
3420
3422
3423
3424
3425
3426
3427
3428
3429
3430
3432 LiveRegs.addLiveOuts(*MBB);
3435 if (MI == InsertI)
3436 break;
3437 LiveRegs.stepBackward(*I);
3438 }
3439 InsertI++;
3440 if (LiveRegs.contains(AArch64::NZCV))
3441 return InsertI;
3442
3444 [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
3445 return Left.Offset < Right.Offset;
3446 });
3447
3448
3449 int64_t CurOffset = Instrs[0].Offset;
3450 for (auto &Instr : Instrs) {
3451 if (CurOffset > Instr.Offset)
3452 return NextI;
3453 CurOffset = Instr.Offset + Instr.Size;
3454 }
3455
3456
3457
3458 TagStoreEdit TSE(MBB, FirstZeroData);
3459 std::optional<int64_t> EndOffset;
3460 for (auto &Instr : Instrs) {
3461 if (EndOffset && *EndOffset != Instr.Offset) {
3462
3463 TSE.emitCode(InsertI, TFI, false);
3464 TSE.clear();
3465 }
3466
3467 TSE.addInstruction(Instr);
3468 EndOffset = Instr.Offset + Instr.Size;
3469 }
3470
3472
3473 TSE.emitCode(
3474 InsertI, TFI,
3476
3477 return InsertI;
3478}
3479}
3480
3483 for (auto &BB : MF)
3486 II = tryMergeAdjacentSTG(II, this, RS);
3487 }
3488
3489
3490
3491
3493 shouldSignReturnAddressEverywhere(MF))
3495}
3496
3497
3498
3499
3502 bool IgnoreSPUpdates) const {
3504 if (IgnoreSPUpdates) {
3505 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
3507 FrameReg = AArch64::SP;
3509 }
3510
3511
3516
3517 FrameReg = AArch64::SP;
3519}
3520
3521
3522
3527
3528
3529
3532
3533 unsigned CSSize =
3535
3538}
3539
3540namespace {
3541struct FrameObject {
3542 bool IsValid = false;
3543
3544 int ObjectIndex = 0;
3545
3546 int GroupIndex = -1;
3547
3548 bool ObjectFirst = false;
3549
3550
3551 bool GroupFirst = false;
3552
3553
3554
3556 enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };
3557};
3558
3559class GroupBuilder {
3560 SmallVector<int, 8> CurrentMembers;
3561 int NextGroupIndex = 0;
3562 std::vector &Objects;
3563
3564public:
3565 GroupBuilder(std::vector &Objects) : Objects(Objects) {}
3566 void AddMember(int Index) { CurrentMembers.push_back(Index); }
3567 void EndCurrentGroup() {
3568 if (CurrentMembers.size() > 1) {
3569
3570
3571
3573 for (int Index : CurrentMembers) {
3574 Objects[Index].GroupIndex = NextGroupIndex;
3576 }
3578 NextGroupIndex++;
3579 }
3580 CurrentMembers.clear();
3581 }
3582};
3583
3584bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606 return std::make_tuple(.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst,
3607 A.GroupIndex, A.ObjectIndex) <
3608 std::make_tuple(.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst,
3609 B.GroupIndex, B.ObjectIndex);
3610}
3611}
3612
3616
3618 ObjectsToAllocate.empty())
3619 return;
3620
3623 for (auto &Obj : ObjectsToAllocate) {
3624 FrameObjects[Obj].IsValid = true;
3625 FrameObjects[Obj].ObjectIndex = Obj;
3626 }
3627
3628
3629
3630 GroupBuilder GB(FrameObjects);
3631 for (auto &MBB : MF) {
3633 if (MI.isDebugInstr())
3634 continue;
3635
3638 if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
3641 FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
3642 else
3643 FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
3644 }
3645 }
3646
3648 switch (MI.getOpcode()) {
3649 case AArch64::STGloop:
3650 case AArch64::STZGloop:
3652 break;
3653 case AArch64::STGi:
3654 case AArch64::STZGi:
3655 case AArch64::ST2Gi:
3656 case AArch64::STZ2Gi:
3658 break;
3659 default:
3661 }
3662
3663 int TaggedFI = -1;
3666 if (MO.isFI()) {
3669 FrameObjects[FI].IsValid)
3670 TaggedFI = FI;
3671 }
3672 }
3673
3674
3675
3676 if (TaggedFI >= 0)
3677 GB.AddMember(TaggedFI);
3678 else
3679 GB.EndCurrentGroup();
3680 }
3681
3682 GB.EndCurrentGroup();
3683 }
3684
3687 FrameObject::AccessHazard;
3688
3689 for (auto &Obj : FrameObjects)
3690 if (!Obj.Accesses ||
3691 Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
3692 Obj.Accesses = FrameObject::AccessGPR;
3693 }
3694
3695
3696
3697
3698
3700 if (TBPI) {
3701 FrameObjects[*TBPI].ObjectFirst = true;
3702 FrameObjects[*TBPI].GroupFirst = true;
3703 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
3704 if (FirstGroupIndex >= 0)
3705 for (FrameObject &Object : FrameObjects)
3706 if (Object.GroupIndex == FirstGroupIndex)
3707 Object.GroupFirst = true;
3708 }
3709
3711
3712 int i = 0;
3713 for (auto &Obj : FrameObjects) {
3714
3715 if (!Obj.IsValid)
3716 break;
3717 ObjectsToAllocate[i++] = Obj.ObjectIndex;
3718 }
3719
3721 dbgs() << "Final frame order:\n";
3722 for (auto &Obj : FrameObjects) {
3723 if (!Obj.IsValid)
3724 break;
3725 dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
3726 if (Obj.ObjectFirst)
3727 dbgs() << ", first";
3728 if (Obj.GroupFirst)
3729 dbgs() << ", group-first";
3730 dbgs() << "\n";
3731 }
3732 });
3733}
3734
3735
3736
3737
3738
3740AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
3748
3751 MF.insert(MBBInsertPoint, LoopMBB);
3753 MF.insert(MBBInsertPoint, ExitMBB);
3754
3755
3756
3760
3761 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
3762 .addReg(AArch64::XZR)
3763 .addReg(AArch64::SP)
3766
3767 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
3768 AArch64::XZR)
3769 .addReg(AArch64::SP)
3773
3774 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
3778
3781
3784 MBB.addSuccessor(LoopMBB);
3785
3787
3788 return ExitMBB->begin();
3789}
3790
3791void AArch64FrameLowering::inlineStackProbeFixed(
3796 const AArch64InstrInfo *TII =
3797 MF.getSubtarget().getInstrInfo();
3798 AArch64FunctionInfo *AFI = MF.getInfo();
3801
3803 int64_t ProbeSize = MF.getInfo()->getStackProbeSize();
3804 int64_t NumBlocks = FrameSize / ProbeSize;
3805 int64_t ResidualSize = FrameSize % ProbeSize;
3806
3807 LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
3808 << NumBlocks << " blocks of " << ProbeSize
3809 << " bytes, plus " << ResidualSize << " bytes\n");
3810
3811
3812
3814 for (int i = 0; i < NumBlocks; ++i) {
3815
3816
3820 EmitAsyncCFI && !HasFP, CFAOffset);
3822
3824 .addReg(AArch64::XZR)
3825 .addReg(AArch64::SP)
3828 }
3829 } else if (NumBlocks != 0) {
3830
3831
3835 EmitAsyncCFI && !HasFP, CFAOffset);
3837 MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
3839 if (EmitAsyncCFI && !HasFP) {
3840
3842 .buildDefCFARegister(AArch64::SP);
3843 }
3844 }
3845
3846 if (ResidualSize != 0) {
3847
3848
3852 EmitAsyncCFI && !HasFP, CFAOffset);
3854
3856 .addReg(AArch64::XZR)
3857 .addReg(AArch64::SP)
3860 }
3861 }
3862}
3863
3864void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
3866
3867
3868
3869 SmallVector<MachineInstr *, 4> ToReplace;
3870 for (MachineInstr &MI : MBB)
3871 if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
3872 MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
3874
3875 for (MachineInstr *MI : ToReplace) {
3876 if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
3877 Register ScratchReg = MI->getOperand(0).getReg();
3878 int64_t FrameSize = MI->getOperand(1).getImm();
3879 StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(),
3880 MI->getOperand(3).getImm());
3881 inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,
3882 CFAOffset);
3883 } else {
3884 assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
3885 "Stack probe pseudo-instruction expected");
3886 const AArch64InstrInfo *TII =
3887 MI->getMF()->getSubtarget().getInstrInfo();
3888 Register TargetReg = MI->getOperand(0).getReg();
3889 (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);
3890 }
3891 MI->eraseFromParent();
3892 }
3893}
3894
3902
3907
3909
3911 return std::make_tuple(start(), Idx) <
3912 std::make_tuple(Rhs.start(), Rhs.Idx);
3913 }
3914
3921
3924
3928 return "FPR";
3930 return "PPR";
3932 return "GPR";
3934 return "NA";
3935 default:
3936 return "Mixed";
3937 }
3938 }
3939
3942 << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
3943 if (Offset.getScalable())
3944 OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
3945 << " * vscale";
3946 OS << "]";
3947 }
3948};
3949
3954
3955void AArch64FrameLowering::emitRemarks(
3957
3958 auto *AFI = MF.getInfo();
3960 return;
3961
3963 const uint64_t HazardSize =
3965
3966 if (HazardSize == 0)
3967 return;
3968
3969 const MachineFrameInfo &MFI = MF.getFrameInfo();
3970
3972 return;
3973
3974 std::vector StackAccesses(MFI.getNumObjects());
3975
3976 size_t NumFPLdSt = 0;
3977 size_t NumNonFPLdSt = 0;
3978
3979
3980 for (const MachineBasicBlock &MBB : MF) {
3981 for (const MachineInstr &MI : MBB) {
3982 if (.mayLoadOrStore() || MI.getNumMemOperands() < 1)
3983 continue;
3984 for (MachineMemOperand *MMO : MI.memoperands()) {
3985 std::optional FI = getMMOFrameID(MMO, MFI);
3987 int FrameIdx = *FI;
3988
3991 StackAccesses[ArrIdx].Idx = FrameIdx;
3992 StackAccesses[ArrIdx].Offset =
3994 StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
3995 }
3996
4002
4003 StackAccesses[ArrIdx].AccessTypes |= RegTy;
4004
4006 ++NumFPLdSt;
4007 else
4008 ++NumNonFPLdSt;
4009 }
4010 }
4011 }
4012 }
4013
4014 if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
4015 return;
4016
4018 llvm::erase_if(StackAccesses, [](const StackAccess &S) {
4020 });
4021
4024
4025 if (StackAccesses.front().isMixed())
4026 MixedObjects.push_back(&StackAccesses.front());
4027
4028 for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
4029 It != End; ++It) {
4030 const auto &First = *It;
4031 const auto &Second = *(It + 1);
4032
4033 if (Second.isMixed())
4034 MixedObjects.push_back(&Second);
4035
4036 if ((First.isSME() && Second.isCPU()) ||
4037 (First.isCPU() && Second.isSME())) {
4038 uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
4039 if (Distance < HazardSize)
4041 }
4042 }
4043
4044 auto EmitRemark = [&](llvm::StringRef Str) {
4045 ORE->emit([&]() {
4046 auto R = MachineOptimizationRemarkAnalysis(
4047 "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
4048 return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
4049 });
4050 };
4051
4052 for (const auto &P : HazardPairs)
4053 EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
4054
4055 for (const auto *Obj : MixedObjects)
4056 EmitRemark(
4057 formatv("{0} accessed by both GP and FP instructions", *Obj).str());
4058}
unsigned const MachineRegisterInfo * MRI
static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)
Definition AArch64FrameLowering.cpp:888
static const unsigned DefaultSafeSPDisplacement
This is the biggest offset to the stack pointer we can encode in aarch64 instructions (without using ...
Definition AArch64FrameLowering.cpp:457
static bool produceCompactUnwindFrame(const AArch64FrameLowering &, MachineFunction &MF)
Definition AArch64FrameLowering.cpp:1548
static cl::opt< bool > StackTaggingMergeSetTag("stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden)
bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, MachineFunction &MF)
Definition AArch64FrameLowering.cpp:1652
static std::optional< int > getLdStFrameID(const MachineInstr &MI, const MachineFrameInfo &MFI)
Definition AArch64FrameLowering.cpp:2328
static cl::opt< bool > SplitSVEObjects("aarch64-split-sve-objects", cl::desc("Split allocation of ZPR & PPR objects"), cl::init(true), cl::Hidden)
static cl::opt< bool > StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", cl::init(false), cl::Hidden)
void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI, SmallVectorImpl< RegPairInfo > &RegPairs, bool NeedsFrameRecord)
Definition AArch64FrameLowering.cpp:1669
static cl::opt< bool > OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden)
static bool invalidateRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, bool IsFirst, const TargetRegisterInfo *TRI)
Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
Definition AArch64FrameLowering.cpp:1603
static cl::opt< bool > DisableMultiVectorSpillFill("aarch64-disable-multivector-spill-fill", cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden)
cl::opt< bool > EnableHomogeneousPrologEpilog("homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)"))
static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL, const MachineFunction &MF)
Definition AArch64FrameLowering.cpp:368
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg)
Definition AArch64FrameLowering.cpp:1538
static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, AssignObjectOffsets AssignOffsets)
Process all the SVE stack objects and the SVE stack size and offsets for each object.
Definition AArch64FrameLowering.cpp:2886
static bool isTargetWindows(const MachineFunction &MF)
Definition AArch64FrameLowering.cpp:386
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
Definition AArch64FrameLowering.cpp:462
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max)
returns true if there are any SVE callee saves.
Definition AArch64FrameLowering.cpp:2864
static cl::opt< unsigned > StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), cl::Hidden)
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE)
Definition AArch64FrameLowering.cpp:750
static unsigned getStackHazardSize(const MachineFunction &MF)
Definition AArch64FrameLowering.cpp:344
static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, bool IsFirst, const TargetRegisterInfo *TRI)
Definition AArch64FrameLowering.cpp:1560
MCRegister findFreePredicateReg(BitVector &SavedRegs)
Definition AArch64FrameLowering.cpp:1641
static bool isPPRAccess(const MachineInstr &MI)
Definition AArch64FrameLowering.cpp:2337
static std::optional< int > getMMOFrameID(MachineMemOperand *MMO, const MachineFrameInfo &MFI)
Definition AArch64FrameLowering.cpp:2308
AssignObjectOffsets
Definition AArch64FrameLowering.cpp:337
@ Yes
Definition AArch64FrameLowering.cpp:337
@ No
Definition AArch64FrameLowering.cpp:337
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
static const int kSetTagLoopThreshold
static int getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
#define CASE(ATTRNAME, AANAME,...)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
const HexagonInstrInfo * TII
static std::string getTypeString(Type *T)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
void emitEpilogue()
Emit the epilogue.
StackOffset getSVEStackSize(const MachineFunction &MF) const
Returns the size of the entire SVE stackframe (PPRs + ZPRs).
StackOffset getZPRStackSize(const MachineFunction &MF) const
Returns the size of the entire ZPR stackframe (calleesaves + spills).
Definition AArch64FrameLowering.cpp:349
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
Definition AArch64FrameLowering.cpp:3481
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition AArch64FrameLowering.cpp:654
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
Definition AArch64FrameLowering.cpp:932
bool enableStackSlotScavenging(const MachineFunction &MF) const override
Returns true if the stack slot holes in the fixed and callee-save stack area should be used when allo...
Definition AArch64FrameLowering.cpp:2847
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
Definition AArch64FrameLowering.cpp:2761
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
Definition AArch64FrameLowering.cpp:1946
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
Definition AArch64FrameLowering.cpp:2164
bool enableFullCFIFixup(const MachineFunction &MF) const override
enableFullCFIFixup - Returns true if we may need to fix the unwind information such that it is accura...
Definition AArch64FrameLowering.cpp:1231
StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI) const override
getFrameIndexReferenceFromSP - This method returns the offset from the stack pointer to the slot of t...
Definition AArch64FrameLowering.cpp:1252
bool enableCFIFixup(const MachineFunction &MF) const override
Returns true if we may need to fix the unwind information for the function.
Definition AArch64FrameLowering.cpp:1226
StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override
getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...
Definition AArch64FrameLowering.cpp:1314
TargetStackID::Value getStackIDForScalableVectors() const override
Returns the StackID that scalable vectors should be associated with.
Definition AArch64FrameLowering.cpp:488
friend class AArch64PrologueEmitter
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
Definition AArch64FrameLowering.cpp:563
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition AArch64FrameLowering.cpp:1214
friend class AArch64EpilogueEmitter
void resetCFIToInitialState(MachineBasicBlock &MBB) const override
Emit CFI instructions that recreate the state of the unwind information upon function entry.
Definition AArch64FrameLowering.cpp:717
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition AArch64FrameLowering.cpp:644
bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) const
Definition AArch64FrameLowering.cpp:390
StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, TargetStackID::Value StackID, Register &FrameReg, bool PreferFP, bool ForSimm) const
Definition AArch64FrameLowering.cpp:1361
bool canUseRedZone(const MachineFunction &MF) const
Can this function use the red zone for local allocations.
Definition AArch64FrameLowering.cpp:533
bool needsWinCFI(const MachineFunction &MF) const
Definition AArch64FrameLowering.cpp:973
bool isFPReserved(const MachineFunction &MF) const
Should the Frame Pointer be reserved for the current function?
Definition AArch64FrameLowering.cpp:619
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition AArch64FrameLowering.cpp:2982
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const
Definition AArch64FrameLowering.cpp:1340
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const
Funclets only need to account for space for the callee saved registers, as the locals are accounted f...
Definition AArch64FrameLowering.cpp:3530
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack frame.
Definition AArch64FrameLowering.cpp:3613
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition AArch64FrameLowering.cpp:1220
StackOffset getPPRStackSize(const MachineFunction &MF) const
Returns the size of the entire PPR stackframe (calleesaves + spills + hazard padding).
Definition AArch64FrameLowering.cpp:355
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition AArch64FrameLowering.cpp:2490
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
Definition AArch64FrameLowering.cpp:1241
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
For Win64 AArch64 EH, the offset to the Unwind object is from the SP before the update.
Definition AArch64FrameLowering.cpp:3500
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const
Definition AArch64FrameLowering.cpp:1350
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve the parent's frame pointer...
Definition AArch64FrameLowering.cpp:3523
bool requiresSaveVG(const MachineFunction &MF) const
Definition AArch64FrameLowering.cpp:1169
void emitPacRetPlusLeafHardening(MachineFunction &MF) const
Harden the entire function with pac-ret.
Definition AArch64FrameLowering.cpp:1181
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
unsigned getPPRCalleeSavedStackSize() const
void setHasStackFrame(bool s)
void setSwiftAsyncContextFrameIdx(int FI)
unsigned getTailCallReservedStack() const
unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const
void setCalleeSaveBaseToFrameRecordOffset(int Offset)
bool hasStackProbing() const
unsigned getArgumentStackToRestore() const
void setCalleeSaveStackHasFreeSpace(bool s)
int getCalleeSaveBaseToFrameRecordOffset() const
SignReturnAddress getSignReturnAddressCondition() const
bool hasStreamingModeChanges() const
void setPredicateRegForFillSpill(unsigned Reg)
int getStackHazardSlotIndex() const
void setCalleeSavedStackSize(unsigned Size)
void setSplitSVEObjects(bool s)
bool hasStackFrame() const
void setStackSizeSVE(uint64_t ZPR, uint64_t PPR)
std::optional< int > getTaggedBasePointerIndex() const
SMEAttrs getSMEFnAttrs() const
uint64_t getLocalStackSize() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
unsigned getVarArgsGPRSize() const
uint64_t getStackSizePPR() const
bool hasSwiftAsyncContext() const
bool hasStackHazardSlotIndex() const
void setStackHazardSlotIndex(int Index)
unsigned getZPRCalleeSavedStackSize() const
void setStackHazardCSRSlotIndex(int Index)
unsigned getPredicateRegForFillSpill() const
void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR)
bool hasCalculatedStackSizeSVE() const
uint64_t getStackSizeZPR() const
bool hasSVEStackSize() const
bool isStackHazardIncludedInCalleeSaveArea() const
unsigned getSVECalleeSavedStackSize() const
bool hasSplitSVEObjects() const
bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const
bool hasCalleeSaveStackFreeSpace() const
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
void emitPrologue()
Emit the prologue.
bool isTargetWindows() const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isTargetMachO() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
bool isStreaming() const
Returns true if the function has a streaming body.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
unsigned getRedZoneSize(const Function &F) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool test(unsigned Idx) const
size_type count() const
count - Returns the number of bits which are set.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
size - Returns the number of bits in this bitvector.
Helper class for creating CFI instructions and inserting them into MIR.
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
bool usesWindowsCFI() const
Wrapper class representing physical registers. Should be passed by value.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MachineInstr & instr_back()
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
reverse_iterator rbegin()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool isCalleeSavedObjectIndex(int ObjectIdx) const
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasScalableStackID(int ObjectIdx) const
int getStackProtectorIndex() const
Return the index for the stack protector object.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isMaxCallFrameSizeComputed() const
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getNumObjects() const
Return the number of objects.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
void setIsCalleeSavedObjectIndex(int ObjectIdx, bool IsCalleeSaved)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool hasEHFunclets() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
void setFlags(unsigned flags)
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
uint32_t getFlags() const
Return the MI flags bitvector.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool isLiveIn(Register Reg) const
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingBody() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
int64_t getScalable() const
Returns the scalable component of the stack.
static StackOffset get(int64_t Fixed, int64_t Scalable)
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
virtual bool enableCFIFixup(const MachineFunction &MF) const
Returns true if we may need to fix the unwind information for the function.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Triple - Helper class for working with autoconf configuration names.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
const unsigned StackProbeMaxLoopUnroll
Maximum number of iterations to unroll for a constant size probing loop.
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ CXX_FAST_TLS
Used for access functions.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ PreserveNone
Used for runtime calls that preserves none general registers.
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ C
The default llvm calling convention, compatible with C.
@ Define
Register definition.
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ LLVM_MARK_AS_BITMASK_ENUM
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isMixed() const
Definition AArch64FrameLowering.cpp:3920
bool operator<(const StackAccess &Rhs) const
Definition AArch64FrameLowering.cpp:3910
StackAccess()
Definition AArch64FrameLowering.cpp:3908
StackOffset Offset
Definition AArch64FrameLowering.cpp:3904
void print(raw_ostream &OS) const
Definition AArch64FrameLowering.cpp:3940
bool isSME() const
Definition AArch64FrameLowering.cpp:3919
int Idx
Definition AArch64FrameLowering.cpp:3903
int64_t start() const
Definition AArch64FrameLowering.cpp:3922
unsigned AccessTypes
Definition AArch64FrameLowering.cpp:3906
AccessType
Definition AArch64FrameLowering.cpp:3896
@ NotAccessed
Definition AArch64FrameLowering.cpp:3897
@ GPR
Definition AArch64FrameLowering.cpp:3898
@ PPR
Definition AArch64FrameLowering.cpp:3899
@ FPR
Definition AArch64FrameLowering.cpp:3900
bool isCPU() const
Definition AArch64FrameLowering.cpp:3915
int64_t Size
Definition AArch64FrameLowering.cpp:3905
std::string getTypeString() const
Definition AArch64FrameLowering.cpp:3925
int64_t end() const
Definition AArch64FrameLowering.cpp:3923
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Pair of physical register and lane mask.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
SmallVector< WinEHHandlerType, 1 > HandlerArray