LLVM: lib/Target/AArch64/AArch64FrameLowering.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
249#include
250#include
251#include
252#include
253#include
254
255using namespace llvm;
256
257#define DEBUG_TYPE "frame-info"
258
260 cl::desc("enable use of redzone on AArch64"),
262
264 "stack-tagging-merge-settag",
265 cl::desc("merge settag instruction in function epilog"), cl::init(true),
267
269 cl::desc("sort stack allocations"),
271
273 "homogeneous-prolog-epilog", cl::Hidden,
274 cl::desc("Emit homogeneous prologue and epilogue for the size "
275 "optimization (default = off)"));
276
277
281
285
287 "aarch64-disable-multivector-spill-fill",
288 cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),
290
291STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
292
293
294
295
296
297
298
303 bool IsTailCallReturn = (MBB.end() != MBBI)
305 : false;
306
307 int64_t ArgumentPopSize = 0;
308 if (IsTailCallReturn) {
310
311
312
313
314 ArgumentPopSize = StackAdjust.getImm();
315 } else {
316
317
318
319
321 }
322
323 return ArgumentPopSize;
324}
325
330
331
332
333
334bool AArch64FrameLowering::homogeneousPrologEpilog(
337 return false;
339 return false;
341 return false;
342
343
345 return false;
346
348 return false;
349
350
354 return false;
356 return false;
357
359 if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
360 return false;
361
362
363
364
366 unsigned NumGPRs = 0;
367 for (unsigned I = 0; CSRegs[I]; ++I) {
369 if (Reg == AArch64::LR) {
370 assert(CSRegs[I + 1] == AArch64::FP);
371 if (NumGPRs % 2 != 0)
372 return false;
373 break;
374 }
375 if (AArch64::GPR64RegClass.contains(Reg))
376 ++NumGPRs;
377 }
378
379 return true;
380}
381
382
383bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
385}
386
387
388
389
390
392
393
394
395
397
398
399
402 if (MI.isDebugInstr() || MI.isPseudo() ||
403 MI.getOpcode() == AArch64::ADDXri ||
404 MI.getOpcode() == AArch64::ADDSXri)
405 continue;
406
408 if (!MO.isFI())
409 continue;
410
414 return 0;
415 }
416 }
417 }
419}
420
424}
425
426
427
430 bool IsFunclet) {
431 if (!IsWin64 || IsFunclet) {
433 } else {
436 Attribute::SwiftAsync))
437 report_fatal_error("cannot generate ABI-changing tail call for Win64");
438
440
441 const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
443 alignTo(VarArgsArea + UnwindHelpObject, 16);
444 }
445}
446
447
451}
452
455 return false;
456
457
458
460 const unsigned RedZoneSize =
462 if (!RedZoneSize)
463 return false;
464
468
469
470
471
472
473 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
475 !Subtarget.hasSVE();
476
477 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
479}
480
481
482
486
487
488
489
491 return true;
492
494 return true;
498 return true;
499
500
501
502
503
504
505
506
509 return true;
510
511 return false;
512}
513
514
515
516
517
518
521
522
523
524
525
527}
528
538 unsigned Opc = I->getOpcode();
539 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
540 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
541
543 int64_t Amount = I->getOperand(0).getImm();
545 if (!IsDestroy)
546 Amount = -Amount;
547
548
549
550
551 if (CalleePopAmount == 0) {
552
553
554
555
556
557
558
559
560
561
562 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
563
566
567
568
569
570
571
573 "non-reserved call frame without var sized objects?");
576 inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
577 } else {
580 }
581 }
582 } else if (CalleePopAmount != 0) {
583
584
585 assert(CalleePopAmount < 0xffffff && "call frame too large");
588 }
590}
591
592void AArch64FrameLowering::emitCalleeSavedGPRLocations(
598 bool LocallyStreaming =
599 Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
600
602 if (CSI.empty())
603 return;
604
609
610 for (const auto &Info : CSI) {
611 unsigned FrameIdx = Info.getFrameIdx();
613 continue;
614
615 assert(.isSpilledToReg() && "Spilling to registers not implemented");
616 int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
618
619
620
621
622 if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) ||
623 (!LocallyStreaming &&
624 DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true)))
625 continue;
626
632 }
633}
634
635void AArch64FrameLowering::emitCalleeSavedSVELocations(
639
640
642 if (CSI.empty())
643 return;
644
650
651 for (const auto &Info : CSI) {
653 continue;
654
655
656
657 assert(.isSpilledToReg() && "Spilling to registers not implemented");
658 unsigned Reg = Info.getReg();
660 continue;
661
665
670 }
671}
672
676 unsigned DwarfReg) {
677 unsigned CFIIndex =
680}
681
684
688 const auto &TRI =
691
692 const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION);
694
695
698 nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0));
700
701
702 if (MFI.shouldSignReturnAddress(MF)) {
703 auto CFIInst = MFI.branchProtectionPAuthLR()
708 }
709
710
711 if (MFI.needsShadowCallStackPrologueEpilogue(MF))
713 TRI.getDwarfRegNum(AArch64::X18, true));
714
715
716 const std::vector &CSI =
718 for (const auto &Info : CSI) {
719 unsigned Reg = Info.getReg();
720 if (.regNeedsCFI(Reg, Reg))
721 continue;
723 TRI.getDwarfRegNum(Reg, true));
724 }
725}
726
729 bool SVE) {
732
734 if (CSI.empty())
735 return;
736
741
742 for (const auto &Info : CSI) {
743 if (SVE !=
745 continue;
746
747 unsigned Reg = Info.getReg();
748 if (SVE &&
750 continue;
751
752 if (.isRestored())
753 continue;
754
756 nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
760 }
761}
762
763void AArch64FrameLowering::emitCalleeSavedGPRRestores(
766}
767
768void AArch64FrameLowering::emitCalleeSavedSVERestores(
771}
772
773
774
776 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
777 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
778}
779
780void AArch64FrameLowering::allocateStackSpace(
782 int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
783 bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
784 bool FollowupAllocs) const {
785
786 if (!AllocSize)
787 return;
788
795
797 const uint64_t AndMask = ~(MaxAlign - 1);
798
800 Register TargetReg = RealignmentPadding
802 : AArch64::SP;
803
806 EmitCFI, InitialOffset);
807
808 if (RealignmentPadding) {
809
815
816
817
818 assert(!NeedsWinCFI);
819 }
820 return;
821 }
822
823
824
825
826
827
828
829 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
831 assert(ScratchReg != AArch64::NoRegister);
837
838
839
840
841 if (FollowupAllocs) {
842
844 .addReg(AArch64::XZR)
848 }
849
850 return;
851 }
852
853
854
855
856
858 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
859 Register ScratchReg = RealignmentPadding
861 : AArch64::SP;
862 assert(ScratchReg != AArch64::NoRegister);
863
866 EmitCFI, InitialOffset);
867 if (RealignmentPadding) {
868
874 }
875 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
877
879 .addReg(AArch64::XZR)
883 }
884 return;
885 }
886
887
888
889
891 assert(TargetReg != AArch64::NoRegister);
892
895 EmitCFI, InitialOffset);
896 if (RealignmentPadding) {
897
902 }
903
906 if (EmitCFI) {
907
908 unsigned Reg =
909 Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true);
910 unsigned CFIIndex =
915 }
916 if (RealignmentPadding)
918}
919
921 switch (Reg.id()) {
922 default:
923
924
925 return 0;
926
927
928#define CASE(n) \
929 case AArch64::W##n: \
930 case AArch64::X##n: \
931 return AArch64::X##n
951#undef CASE
952
953
954#define CASE(n) \
955 case AArch64::B##n: \
956 case AArch64::H##n: \
957 case AArch64::S##n: \
958 case AArch64::D##n: \
959 case AArch64::Q##n: \
960 return HasSVE ? AArch64::Z##n : AArch64::Q##n
993#undef CASE
994 }
995}
996
997void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
999
1001
1002
1005 DL = MBBI->getDebugLoc();
1006
1010
1015 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
1016
1018 GPRsToZero.set(XReg);
1020
1022 FPRsToZero.set(XReg);
1023 }
1024 }
1025
1027
1028
1029 for (MCRegister Reg : GPRsToZero.set_bits())
1031
1032
1033 for (MCRegister Reg : FPRsToZero.set_bits())
1035
1036 if (HasSVE) {
1038 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
1039 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
1040 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
1041 AArch64::P15}) {
1042 if (RegsToZero[PReg])
1044 }
1045 }
1046}
1047
1052
1054 for (unsigned i = 0; CSRegs[i]; ++i)
1055 LiveRegs.addReg(CSRegs[i]);
1056}
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1071
1072
1073
1074
1077 return AArch64::X9;
1078
1083
1084
1087 return AArch64::X9;
1088
1089 for (unsigned Reg : AArch64::GPR64RegClass) {
1091 return Reg;
1092 }
1093 return AArch64::NoRegister;
1094}
1095
1104
1110
1111
1112 if (!LiveRegs.available(MRI, AArch64::X16) ||
1114 return false;
1115 }
1116
1117
1118
1121 return false;
1122
1123
1124
1125 if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF))
1126 return true;
1127
1128
1130}
1131
1133 uint64_t StackSizeInBytes) {
1136
1137
1138 return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
1139 StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
1140}
1141
1145 F.needsUnwindTableEntry();
1146}
1147
1148bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1154 if (homogeneousPrologEpilog(MF))
1155 return false;
1156
1158 return false;
1159
1160
1161
1162
1163
1164
1165
1166
1167
1170 return false;
1171
1172
1173
1175 return false;
1176
1177 if (MFI.hasVarSizedObjects())
1178 return false;
1179
1180 if (RegInfo->hasStackRealignment(MF))
1181 return false;
1182
1183
1184
1185
1187 return false;
1188
1189
1190
1192 return false;
1193
1194 return true;
1195}
1196
1197bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
1199 if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
1200 return false;
1202 return true;
1203
1204
1205
1208 while (LastI != Begin) {
1209 --LastI;
1210 if (LastI->isTransient())
1211 continue;
1213 break;
1214 }
1215 switch (LastI->getOpcode()) {
1216 case AArch64::STGloop:
1217 case AArch64::STZGloop:
1218 case AArch64::STGi:
1219 case AArch64::STZGi:
1220 case AArch64::ST2Gi:
1221 case AArch64::STZ2Gi:
1222 return false;
1223 default:
1224 return true;
1225 }
1227}
1228
1229
1230
1234 unsigned Opc = MBBI->getOpcode();
1238 unsigned ImmIdx = MBBI->getNumOperands() - 1;
1239 int Imm = MBBI->getOperand(ImmIdx).getImm();
1243
1244 switch (Opc) {
1245 default:
1247 case AArch64::LDPDpost:
1248 Imm = -Imm;
1249 [[fallthrough]];
1250 case AArch64::STPDpre: {
1251 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1252 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
1253 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
1258 break;
1259 }
1260 case AArch64::LDPXpost:
1261 Imm = -Imm;
1262 [[fallthrough]];
1263 case AArch64::STPXpre: {
1264 Register Reg0 = MBBI->getOperand(1).getReg();
1265 Register Reg1 = MBBI->getOperand(2).getReg();
1266 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1267 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
1270 else
1271 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
1276 break;
1277 }
1278 case AArch64::LDRDpost:
1279 Imm = -Imm;
1280 [[fallthrough]];
1281 case AArch64::STRDpre: {
1282 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1283 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
1287 break;
1288 }
1289 case AArch64::LDRXpost:
1290 Imm = -Imm;
1291 [[fallthrough]];
1292 case AArch64::STRXpre: {
1293 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1294 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
1298 break;
1299 }
1300 case AArch64::STPDi:
1301 case AArch64::LDPDi: {
1302 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1303 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1304 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
1309 break;
1310 }
1311 case AArch64::STPXi:
1312 case AArch64::LDPXi: {
1313 Register Reg0 = MBBI->getOperand(0).getReg();
1314 Register Reg1 = MBBI->getOperand(1).getReg();
1315 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1316 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
1319 else
1320 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
1325 break;
1326 }
1327 case AArch64::STRXui:
1328 case AArch64::LDRXui: {
1329 int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1330 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
1334 break;
1335 }
1336 case AArch64::STRDui:
1337 case AArch64::LDRDui: {
1338 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1339 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
1343 break;
1344 }
1345 case AArch64::STPQi:
1346 case AArch64::LDPQi: {
1347 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
1348 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1349 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP))
1354 break;
1355 }
1356 case AArch64::LDPQpost:
1357 Imm = -Imm;
1358 [[fallthrough]];
1359 case AArch64::STPQpre: {
1360 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
1361 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
1362 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX))
1367 break;
1368 }
1369 }
1371 return I;
1372}
1373
1374
1376 unsigned LocalStackSize) {
1378 unsigned ImmIdx = MBBI->getNumOperands() - 1;
1379 switch (MBBI->getOpcode()) {
1380 default:
1382 case AArch64::SEH_SaveFPLR:
1383 case AArch64::SEH_SaveRegP:
1384 case AArch64::SEH_SaveReg:
1385 case AArch64::SEH_SaveFRegP:
1386 case AArch64::SEH_SaveFReg:
1387 case AArch64::SEH_SaveAnyRegQP:
1388 case AArch64::SEH_SaveAnyRegQPX:
1389 ImmOpnd = &MBBI->getOperand(ImmIdx);
1390 break;
1391 }
1392 if (ImmOpnd)
1393 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
1394}
1395
1400}
1401
1404
1405
1407 return false;
1409 if (ST.isTargetDarwin())
1410 return ST.hasSVE();
1411 return true;
1412}
1413
1415 unsigned Opc = MBBI->getOpcode();
1416 if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1417 Opc == AArch64::UBFMXri)
1418 return true;
1419
1421 if (Opc == AArch64::ORRXrr)
1422 return true;
1423
1424 if (Opc == AArch64::BL) {
1425 auto Op1 = MBBI->getOperand(0);
1426 return Op1.isSymbol() &&
1427 (StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
1428 }
1429 }
1430
1431 return false;
1432}
1433
1434
1435
1436
1440 bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
1442 int CFAOffset = 0) {
1443 unsigned NewOpc;
1444
1445
1446
1447
1448
1453
1454 switch (MBBI->getOpcode()) {
1455 default:
1456 llvm_unreachable("Unexpected callee-save save/restore opcode!");
1457 case AArch64::STPXi:
1458 NewOpc = AArch64::STPXpre;
1459 break;
1460 case AArch64::STPDi:
1461 NewOpc = AArch64::STPDpre;
1462 break;
1463 case AArch64::STPQi:
1464 NewOpc = AArch64::STPQpre;
1465 break;
1466 case AArch64::STRXui:
1467 NewOpc = AArch64::STRXpre;
1468 break;
1469 case AArch64::STRDui:
1470 NewOpc = AArch64::STRDpre;
1471 break;
1472 case AArch64::STRQui:
1473 NewOpc = AArch64::STRQpre;
1474 break;
1475 case AArch64::LDPXi:
1476 NewOpc = AArch64::LDPXpost;
1477 break;
1478 case AArch64::LDPDi:
1479 NewOpc = AArch64::LDPDpost;
1480 break;
1481 case AArch64::LDPQi:
1482 NewOpc = AArch64::LDPQpost;
1483 break;
1484 case AArch64::LDRXui:
1485 NewOpc = AArch64::LDRXpost;
1486 break;
1487 case AArch64::LDRDui:
1488 NewOpc = AArch64::LDRDpost;
1489 break;
1490 case AArch64::LDRQui:
1491 NewOpc = AArch64::LDRQpost;
1492 break;
1493 }
1494
1495 if (NeedsWinCFI) {
1496 auto SEH = std::next(MBBI);
1498 SEH->eraseFromParent();
1499 }
1500
1502 int64_t MinOffset, MaxOffset;
1504 NewOpc, Scale, Width, MinOffset, MaxOffset);
1507
1508
1509
1510 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
1511 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
1512 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
1513
1514
1519 false, false, nullptr, EmitCFI,
1521
1522 return std::prev(MBBI);
1523 }
1524
1527
1528
1529 unsigned OpndIdx = 0;
1530 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
1531 ++OpndIdx)
1532 MIB.add(MBBI->getOperand(OpndIdx));
1533
1534 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
1535 "Unexpected immediate offset in first/last callee-save save/restore "
1536 "instruction!");
1537 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
1538 "Unexpected base register in callee-save save/restore instruction!");
1539 assert(CSStackSizeInc % Scale == 0);
1540 MIB.addImm(CSStackSizeInc / (int)Scale);
1541
1544
1545
1546 if (NeedsWinCFI) {
1547 *HasWinCFI = true;
1549 }
1550
1551 if (EmitCFI) {
1557 }
1558
1560}
1561
1562
1563
1566 bool NeedsWinCFI,
1567 bool *HasWinCFI) {
1569 return;
1570
1571 unsigned Opc = MI.getOpcode();
1572 unsigned Scale;
1573 switch (Opc) {
1574 case AArch64::STPXi:
1575 case AArch64::STRXui:
1576 case AArch64::STPDi:
1577 case AArch64::STRDui:
1578 case AArch64::LDPXi:
1579 case AArch64::LDRXui:
1580 case AArch64::LDPDi:
1581 case AArch64::LDRDui:
1582 Scale = 8;
1583 break;
1584 case AArch64::STPQi:
1585 case AArch64::STRQui:
1586 case AArch64::LDPQi:
1587 case AArch64::LDRQui:
1588 Scale = 16;
1589 break;
1590 default:
1591 llvm_unreachable("Unexpected callee-save save/restore opcode!");
1592 }
1593
1594 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
1595 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
1596 "Unexpected base register in callee-save save/restore instruction!");
1597
1599
1600 assert(LocalStackSize % Scale == 0);
1601 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
1602
1603 if (NeedsWinCFI) {
1604 *HasWinCFI = true;
1606 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
1608 "Expecting a SEH instruction");
1610 }
1611}
1612
1615}
1616
1619}
1620
1621
1623 switch (I->getOpcode()) {
1624 default:
1625 return false;
1626 case AArch64::PTRUE_C_B:
1627 case AArch64::LD1B_2Z_IMM:
1628 case AArch64::ST1B_2Z_IMM:
1629 case AArch64::STR_ZXI:
1630 case AArch64::STR_PXI:
1631 case AArch64::LDR_ZXI:
1632 case AArch64::LDR_PXI:
1635 }
1636}
1637
1642 const DebugLoc &DL, bool NeedsWinCFI,
1643 bool NeedsUnwindInfo) {
1644
1647 .addReg(AArch64::LR)
1648 .addReg(AArch64::X18)
1651
1652
1654
1655 if (NeedsWinCFI)
1658
1659 if (NeedsUnwindInfo) {
1660
1661
1662 static const char CFIInst[] = {
1663 dwarf::DW_CFA_val_expression,
1664 18,
1665 2,
1666 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1667 static_cast<char>(-8) & 0x7f,
1668 };
1670 nullptr, StringRef(CFIInst, sizeof(CFIInst))));
1674 }
1675}
1676
1682
1686 .addReg(AArch64::X18)
1689
1691 unsigned CFIIndex =
1696 }
1697}
1698
1699
1702 const DebugLoc &DL, unsigned FixedObject) {
1707
1708 const int OffsetToFirstCalleeSaveFromFP =
1712 unsigned Reg = TRI->getDwarfRegNum(FramePtr, true);
1714 nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
1718}
1719
1720#ifndef NDEBUG
1721
1722
1725
1731}
1732#endif
1733
1742
1746 bool HasFP = hasFP(MF);
1748 bool HasWinCFI = false;
1750
1752#ifndef NDEBUG
1754
1755
1757 while (NonFrameStart != End &&
1759 ++NonFrameStart;
1760
1762 if (NonFrameStart != MBB.end()) {
1764
1765 LiveRegs.removeReg(AArch64::SP);
1766 LiveRegs.removeReg(AArch64::X19);
1767 LiveRegs.removeReg(AArch64::FP);
1768 LiveRegs.removeReg(AArch64::LR);
1769
1770
1771
1772
1774 LiveRegs.removeReg(AArch64::X0);
1775 }
1776
1778 if (NonFrameStart == MBB.end())
1779 return;
1780
1783 for (auto &Op : MI.operands())
1784 if (Op.isReg() && Op.isDef())
1785 assert(!LiveRegs.contains(Op.getReg()) &&
1786 "live register clobbered by inserted prologue instructions");
1787 }
1788 });
1789#endif
1790
1792
1793
1794
1795
1797
1798
1799
1801
1803 if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
1805 MFnI.needsDwarfUnwindInfo(MF));
1806
1807 if (MFnI.shouldSignReturnAddress(MF)) {
1810 if (NeedsWinCFI)
1811 HasWinCFI = true;
1812 }
1813
1814 if (EmitCFI && MFnI.isMTETagged()) {
1817 }
1818
1819
1820
1821
1822
1827
1828
1832 if (NeedsWinCFI) {
1835 HasWinCFI = true;
1836 }
1838 .addUse(AArch64::FP)
1839 .addUse(AArch64::X16)
1841 if (NeedsWinCFI) {
1844 HasWinCFI = true;
1845 }
1846 break;
1847 }
1848 [[fallthrough]];
1849
1851
1853 .addUse(AArch64::FP)
1856 if (NeedsWinCFI) {
1859 HasWinCFI = true;
1860 }
1861 break;
1862
1864 break;
1865 }
1866 }
1867
1868
1869
1871 return;
1872
1873
1874
1876 if (TBPI)
1878 else
1880
1882
1883
1884
1885
1886
1887
1888
1889 int64_t NumBytes =
1892 assert(!HasFP && "unexpected function without stack frame but with FP");
1893 assert(!SVEStackSize &&
1894 "unexpected function without stack frame but with SVE objects");
1895
1897 if (!NumBytes)
1898 return;
1899
1900
1903 ++NumRedZoneFunctions;
1904 } else {
1908 if (EmitCFI) {
1909
1911
1917 }
1918 }
1919
1920 if (NeedsWinCFI) {
1921 HasWinCFI = true;
1924 }
1925
1926 return;
1927 }
1928
1929 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
1930 unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1931
1933
1935 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1936 bool HomPrologEpilog = homogeneousPrologEpilog(MF);
1937 if (CombineSPBump) {
1938 assert(!SVEStackSize && "Cannot combine SP bump with SVE");
1942 EmitAsyncCFI);
1943 NumBytes = 0;
1944 } else if (HomPrologEpilog) {
1945
1946 NumBytes -= PrologueSaveSize;
1947 } else if (PrologueSaveSize != 0) {
1949 MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
1950 EmitAsyncCFI);
1951 NumBytes -= PrologueSaveSize;
1952 }
1953 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1954
1955
1956
1957
1960 if (CombineSPBump &&
1961
1964 NeedsWinCFI, &HasWinCFI);
1966 }
1967
1968
1969 if (!IsFunclet && HasFP) {
1970
1972
1973 if (CombineSPBump)
1975
1977
1978
1979
1981 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1982 if (HaveInitialContext)
1984 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1987 .addUse(AArch64::SP)
1988 .addImm(FPOffset - 8)
1990 if (NeedsWinCFI) {
1991
1992
1996 HasWinCFI = true;
1997 }
1998 }
1999
2000 if (HomPrologEpilog) {
2003 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
2005 } else {
2006
2007
2008
2009
2013 if (NeedsWinCFI && HasWinCFI) {
2016
2017
2018 NeedsWinCFI = false;
2019 }
2020 }
2021 if (EmitAsyncCFI)
2023 }
2024
2025
2026
2027
2028 if (EmitAsyncCFI)
2029 emitCalleeSavedGPRLocations(MBB, MBBI);
2030
2031
2032 const bool NeedsRealignment =
2033 NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
2034 const int64_t RealignmentPadding =
2037 : 0;
2038
2040 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
2041 if (NeedsWinCFI) {
2042 HasWinCFI = true;
2043
2044
2045
2046
2047
2048 if (NumBytes >= (1 << 28))
2050 "unwinding purposes");
2051
2052 uint32_t LowNumWords = NumWords & 0xFFFF;
2054 .addImm(LowNumWords)
2059 if ((NumWords & 0xFFFF0000) != 0) {
2061 .addReg(AArch64::X15)
2062 .addImm((NumWords & 0xFFFF0000) >> 16)
2067 }
2068 } else {
2072 }
2073
2087 if (NeedsWinCFI) {
2088 HasWinCFI = true;
2091 }
2092 break;
2099 if (NeedsWinCFI) {
2100 HasWinCFI = true;
2103 }
2104
2112 if (NeedsWinCFI) {
2113 HasWinCFI = true;
2116 }
2117 break;
2118 }
2119
2125 if (NeedsWinCFI) {
2126 HasWinCFI = true;
2130 }
2131 NumBytes = 0;
2132
2133 if (RealignmentPadding > 0) {
2134 if (RealignmentPadding >= 4096) {
2137 .addImm(RealignmentPadding)
2140 .addReg(AArch64::SP)
2144 } else {
2146 .addReg(AArch64::SP)
2147 .addImm(RealignmentPadding)
2150 }
2151
2157
2158
2159
2160 assert(!NeedsWinCFI);
2161 }
2162 }
2163
2164 StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
2166
2167
2168
2170 LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
2171 << "\n");
2172
2173 CalleeSavesBegin = MBBI;
2177 CalleeSavesEnd = MBBI;
2178
2180 SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
2181 }
2182
2183
2187 allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
2188 nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
2190 CFAOffset += SVECalleeSavesSize;
2191
2192 if (EmitAsyncCFI)
2193 emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
2194
2195
2196
2198 "Cannot use redzone with stack realignment");
2200
2201
2202
2203 allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
2205 NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
2207 }
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217 if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
2219 false);
2220 if (NeedsWinCFI) {
2221 HasWinCFI = true;
2224 }
2225 }
2226
2227
2228
2229 if (NeedsWinCFI && HasWinCFI) {
2232 }
2233
2234
2235
2236
2237 if (IsFunclet && F.hasPersonalityFn()) {
2241 .addReg(AArch64::X1)
2244 }
2245 }
2246
2247 if (EmitCFI && !EmitAsyncCFI) {
2248 if (HasFP) {
2250 } else {
2254 *RegInfo, AArch64::SP, AArch64::SP, TotalSize,
2255 false));
2259 }
2260 emitCalleeSavedGPRLocations(MBB, MBBI);
2261 emitCalleeSavedSVELocations(MBB, MBBI);
2262 }
2263}
2264
2266 switch (MI.getOpcode()) {
2267 default:
2268 return false;
2269 case AArch64::CATCHRET:
2270 case AArch64::CLEANUPRET:
2271 return true;
2272 }
2273}
2274
2285 bool HasWinCFI = false;
2286 bool IsFunclet = false;
2287
2289 DL = MBBI->getDebugLoc();
2291 }
2292
2294
2297 BuildMI(MBB, MBB.getFirstTerminator(), DL,
2298 TII->get(AArch64::PAUTH_EPILOGUE))
2299 .setMIFlag(MachineInstr::FrameDestroy);
2300 if (NeedsWinCFI)
2301 HasWinCFI = true;
2302 }
2305 if (EmitCFI)
2307 if (HasWinCFI) {
2309 TII->get(AArch64::SEH_EpilogEnd))
2313 }
2314 if (NeedsWinCFI) {
2316 if (!HasWinCFI)
2318 }
2319 });
2320
2323
2324
2325
2327 return;
2328
2329
2330
2334 unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
2335
2336 int64_t AfterCSRPopSize = ArgumentStackToRestore;
2338
2339
2340
2341
2344 if (homogeneousPrologEpilog(MF, &MBB)) {
2345 assert(!NeedsWinCFI);
2347 if (LastPopI != MBB.begin()) {
2348 auto HomogeneousEpilog = std::prev(LastPopI);
2349 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
2350 LastPopI = HomogeneousEpilog;
2351 }
2352
2353
2357
2358
2359
2360 assert(AfterCSRPopSize == 0);
2361 return;
2362 }
2363 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
2364
2365 bool CombineAfterCSRBump = false;
2366 if (!CombineSPBump && PrologueSaveSize != 0) {
2368 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
2370 Pop = std::prev(Pop);
2371
2372
2373 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
2374
2375
2376
2377 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
2379 MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
2381 } else {
2382
2383
2384
2385
2386 AfterCSRPopSize += PrologueSaveSize;
2387 CombineAfterCSRBump = true;
2388 }
2389 }
2390
2391
2392
2393
2396 while (LastPopI != Begin) {
2397 --LastPopI;
2400 ++LastPopI;
2401 break;
2402 } else if (CombineSPBump)
2404 NeedsWinCFI, &HasWinCFI);
2405 }
2406
2407 if (NeedsWinCFI) {
2408
2409
2410
2411
2412
2413
2414 BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
2416 EpilogStartI = LastPopI;
2417 --EpilogStartI;
2418 }
2419
2423
2424
2425
2426 [[fallthrough]];
2428
2429
2430
2431
2433 AArch64::FP)
2434 .addUse(AArch64::FP)
2437 if (NeedsWinCFI) {
2440 HasWinCFI = true;
2441 }
2442 break;
2443
2445 break;
2446 }
2447 }
2448
2450
2451
2452 if (CombineSPBump) {
2453 assert(!SVEStackSize && "Cannot combine SP bump with SVE");
2454
2455
2456 if (EmitCFI && hasFP(MF)) {
2458 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
2459 unsigned CFIIndex =
2461 BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
2464 }
2465
2470 return;
2471 }
2472
2473 NumBytes -= PrologueSaveSize;
2474 assert(NumBytes >= 0 && "Negative stack allocation size!?");
2475
2476
2477
2478 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
2481 RestoreBegin = std::prev(RestoreEnd);
2482 while (RestoreBegin != MBB.begin() &&
2484 --RestoreBegin;
2485
2487 IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
2488
2491 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
2492 DeallocateAfter = CalleeSavedSizeAsOffset;
2493 }
2494
2495
2496 if (SVEStackSize) {
2497
2498
2499
2502
2503
2504
2508 }
2509 } else {
2511
2512
2514 MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
2516 false, false, nullptr, EmitCFI && (MF),
2518 NumBytes = 0;
2519 }
2520
2523 false, nullptr, EmitCFI && (MF),
2524 SVEStackSize +
2526
2529 false, nullptr, EmitCFI && (MF),
2530 DeallocateAfter +
2532 }
2533 if (EmitCFI)
2534 emitCalleeSavedSVERestores(MBB, RestoreEnd);
2535 }
2536
2537 if ((MF)) {
2539
2540
2541 if (RedZone && AfterCSRPopSize == 0)
2542 return;
2543
2544
2545
2546
2547
2548 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
2549 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
2550 if (NoCalleeSaveRestore)
2551 StackRestoreBytes += AfterCSRPopSize;
2552
2554 MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
2558
2559
2560
2561 if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
2562 return;
2563 }
2564
2565 NumBytes = 0;
2566 }
2567
2568
2569
2570
2571
2574 MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
2577 } else if (NumBytes)
2581
2582
2583 if (EmitCFI && hasFP(MF)) {
2585 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
2588 BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
2591 }
2592
2593
2594
2595
2596 if (AfterCSRPopSize) {
2597 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
2598 "interrupt may have clobbered");
2599
2603 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
2605 }
2606}
2607
2611}
2612
2613
2614
2615
2616
2621 MF, FI, FrameReg,
2622
2625 false);
2626}
2627
2630 int FI) const {
2631
2632
2633
2634
2635
2636
2638
2641
2642
2643
2644
2645 if (MFI.isVariableSizedObjectIndex(FI)) {
2647 }
2648
2649
2650 if (!SVEStackSize)
2652
2656 ObjectOffset);
2657 }
2658
2659 bool IsFixed = MFI.isFixedObjectIndex(FI);
2660 bool IsCSR =
2662
2664 if (!IsFixed && !IsCSR)
2665 ScalableOffset = -SVEStackSize;
2666
2668}
2669
2672 int FI) const {
2674}
2675
2677 int64_t ObjectOffset) {
2681 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
2682 unsigned FixedObject =
2685 int64_t FPAdjust =
2688}
2689
2691 int64_t ObjectOffset) {
2694}
2695
2696
2698 int FI) const {
2702 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
2705}
2706
2709 bool ForSimm) const {
2712 bool isFixed = MFI.isFixedObjectIndex(FI);
2715 PreferFP, ForSimm);
2716}
2717
2719 const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
2720 Register &FrameReg, bool PreferFP, bool ForSimm) const {
2726
2729 bool isCSR =
2731
2733
2734
2735
2736
2737
2738 bool UseFP = false;
2740
2741
2742
2743 PreferFP &= !SVEStackSize;
2744
2745
2746
2747
2748
2749 if (isFixed) {
2750 UseFP = hasFP(MF);
2751 } else if (isCSR && RegInfo->hasStackRealignment(MF)) {
2752
2753
2754
2755 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
2756 UseFP = true;
2757 } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
2758
2759
2760
2761
2762 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
2763 PreferFP |= Offset > -FPOffset && !SVEStackSize;
2764
2765 if (FPOffset >= 0) {
2766
2767
2768 UseFP = true;
2769 } else if (MFI.hasVarSizedObjects()) {
2770
2771
2772
2773 bool CanUseBP = RegInfo->hasBasePointer(MF);
2774 if (FPOffsetFits && CanUseBP)
2775 UseFP = PreferFP;
2776 else if (!CanUseBP)
2777 UseFP = true;
2778
2779
2780
2781 } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
2782
2783
2784
2785 (void) Subtarget;
2788 "Funclets should only be present on Win64");
2789 UseFP = true;
2790 } else {
2791
2792 if (FPOffsetFits && PreferFP)
2793 UseFP = true;
2794 }
2795 }
2796 }
2797
2799 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
2800 "In the presence of dynamic stack pointer realignment, "
2801 "non-argument/CSR objects cannot be accessed through the frame pointer");
2802
2803 if (isSVE) {
2807 SVEStackSize +
2809 ObjectOffset);
2810
2813 RegInfo->hasStackRealignment(MF))) {
2814 FrameReg = RegInfo->getFrameRegister(MF);
2815 return FPOffset;
2816 }
2817
2818 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
2820 return SPOffset;
2821 }
2822
2824 if (UseFP && !(isFixed || isCSR))
2825 ScalableOffset = -SVEStackSize;
2826 if (!UseFP && (isFixed || isCSR))
2827 ScalableOffset = SVEStackSize;
2828
2829 if (UseFP) {
2830 FrameReg = RegInfo->getFrameRegister(MF);
2832 }
2833
2834
2835 if (RegInfo->hasBasePointer(MF))
2836 FrameReg = RegInfo->getBaseRegister();
2837 else {
2838 assert(!MFI.hasVarSizedObjects() &&
2839 "Can't use SP when we have var sized objects.");
2840 FrameReg = AArch64::SP;
2841
2842
2843
2846 }
2847
2849}
2850
2852
2853
2854
2855
2856
2859}
2860
2867 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
2870}
2871
2873 bool NeedsWinCFI, bool IsFirst,
2875
2876
2877
2878
2879
2880
2881
2882 if (Reg2 == AArch64::FP)
2883 return true;
2884 if (!NeedsWinCFI)
2885 return false;
2886 if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
2887 return false;
2888
2889
2890
2891
2892
2893 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2894 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2895 return false;
2896 return true;
2897}
2898
2899
2900
2901
2902
2904 bool UsesWinAAPCS, bool NeedsWinCFI,
2905 bool NeedsFrameRecord, bool IsFirst,
2907 if (UsesWinAAPCS)
2910
2911
2912
2913 if (NeedsFrameRecord)
2914 return Reg2 == AArch64::LR;
2915
2916 return false;
2917}
2918
2919namespace {
2920
2921struct RegPairInfo {
2922 unsigned Reg1 = AArch64::NoRegister;
2923 unsigned Reg2 = AArch64::NoRegister;
2924 int FrameIdx;
2926 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
2928
2929 RegPairInfo() = default;
2930
2931 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
2932
2933 bool isScalable() const { return Type == PPR || Type == ZPR; }
2934};
2935
2936}
2937
2939 for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
2940 if (SavedRegs.test(PReg)) {
2941 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
2942 return PNReg;
2943 }
2944 }
2945 return AArch64::NoRegister;
2946}
2947
2948
2952 return false;
2953
2955 bool IsLocallyStreaming =
2957
2958
2959
2960
2961 return Subtarget.hasSVE2p1() ||
2962 (Subtarget.hasSME2() &&
2963 (!IsLocallyStreaming && Subtarget.isStreaming()));
2964}
2965
2969 bool NeedsFrameRecord) {
2970
2971 if (CSI.empty())
2972 return;
2973
2980 unsigned Count = CSI.size();
2981 (void)CC;
2982
2983
2987 "Odd number of callee-saved regs to spill!");
2989 int StackFillDir = -1;
2990 int RegInc = 1;
2991 unsigned FirstReg = 0;
2992 if (NeedsWinCFI) {
2993
2994 ByteOffset = 0;
2995 StackFillDir = 1;
2996
2997
2998 RegInc = -1;
2999 FirstReg = Count - 1;
3000 }
3004
3005
3006 for (unsigned i = FirstReg; i < Count; i += RegInc) {
3007 RegPairInfo RPI;
3008 RPI.Reg1 = CSI[i].getReg();
3009
3010 if (AArch64::GPR64RegClass.contains(RPI.Reg1)) {
3011 RPI.Type = RegPairInfo::GPR;
3012 RPI.RC = &AArch64::GPR64RegClass;
3013 } else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) {
3014 RPI.Type = RegPairInfo::FPR64;
3015 RPI.RC = &AArch64::FPR64RegClass;
3016 } else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) {
3017 RPI.Type = RegPairInfo::FPR128;
3018 RPI.RC = &AArch64::FPR128RegClass;
3019 } else if (AArch64::ZPRRegClass.contains(RPI.Reg1)) {
3020 RPI.Type = RegPairInfo::ZPR;
3021 RPI.RC = &AArch64::ZPRRegClass;
3022 } else if (AArch64::PPRRegClass.contains(RPI.Reg1)) {
3023 RPI.Type = RegPairInfo::PPR;
3024 RPI.RC = &AArch64::PPRRegClass;
3025 } else if (RPI.Reg1 == AArch64::VG) {
3026 RPI.Type = RegPairInfo::VG;
3027 RPI.RC = &AArch64::FIXED_REGSRegClass;
3028 } else {
3030 }
3031
3032
3036 ByteOffset += StackFillDir * StackHazardSize;
3037 LastReg = RPI.Reg1;
3038
3039 int Scale = TRI->getSpillSize(*RPI.RC);
3040
3042 Register NextReg = CSI[i + RegInc].getReg();
3043 bool IsFirst = i == FirstReg;
3044 switch (RPI.Type) {
3045 case RegPairInfo::GPR:
3046 if (AArch64::GPR64RegClass.contains(NextReg) &&
3048 NeedsWinCFI, NeedsFrameRecord, IsFirst,
3050 RPI.Reg2 = NextReg;
3051 break;
3052 case RegPairInfo::FPR64:
3053 if (AArch64::FPR64RegClass.contains(NextReg) &&
3055 IsFirst, TRI))
3056 RPI.Reg2 = NextReg;
3057 break;
3058 case RegPairInfo::FPR128:
3059 if (AArch64::FPR128RegClass.contains(NextReg))
3060 RPI.Reg2 = NextReg;
3061 break;
3062 case RegPairInfo::PPR:
3063 break;
3064 case RegPairInfo::ZPR:
3066 ((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {
3067
3068
3069 int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;
3071 RPI.Reg2 = NextReg;
3072 }
3073 break;
3074 case RegPairInfo::VG:
3075 break;
3076 }
3077 }
3078
3079
3080
3081
3082
3083
3084
3085 assert((!RPI.isPaired() ||
3086 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
3087 "Out of order callee saved regs!");
3088
3089 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
3090 RPI.Reg1 == AArch64::LR) &&
3091 "FrameRecord must be allocated together with LR");
3092
3093
3094 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
3095 RPI.Reg2 == AArch64::LR) &&
3096 "FrameRecord must be allocated together with LR");
3097
3098
3099
3103 (RPI.isPaired() &&
3104 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
3105 RPI.Reg1 + 1 == RPI.Reg2))) &&
3106 "Callee-save registers not saved as adjacent register pair!");
3107
3108 RPI.FrameIdx = CSI[i].getFrameIdx();
3109 if (NeedsWinCFI &&
3110 RPI.isPaired())
3111 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
3112
3113 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3114 assert(OffsetPre % Scale == 0);
3115
3116 if (RPI.isScalable())
3117 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3118 else
3119 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3120
3121
3122
3124 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3125 (IsWindows && RPI.Reg2 == AArch64::LR)))
3126 ByteOffset += StackFillDir * 8;
3127
3128
3129
3130 if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
3131 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
3132 ByteOffset % 16 != 0) {
3133 ByteOffset += 8 * StackFillDir;
3135
3136
3137
3139 NeedGapToAlignStack = false;
3140 }
3141
3142 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3143 assert(OffsetPost % Scale == 0);
3144
3145
3146 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
3147
3148
3149
3151 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3152 (IsWindows && RPI.Reg2 == AArch64::LR)))
3154 RPI.Offset = Offset / Scale;
3155
3156 assert((!RPI.isPaired() ||
3157 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
3158 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
3159 "Offset out of bounds for LDP/STP immediate");
3160
3161 auto isFrameRecord = [&] {
3162 if (RPI.isPaired())
3163 return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
3164 : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
3165
3166
3167
3168
3169
3170
3171
3172 return i > 0 && RPI.Reg1 == AArch64::FP &&
3173 CSI[i - 1].getReg() == AArch64::LR;
3174 };
3175
3176
3177
3178 if (NeedsFrameRecord && isFrameRecord())
3180
3182 if (RPI.isPaired())
3183 i += RegInc;
3184 }
3185 if (NeedsWinCFI) {
3186
3187
3188
3189
3190
3193
3194
3195 std::reverse(RegPairs.begin(), RegPairs.end());
3196 }
3197}
3198
3208
3210
3212
3213
3214 MRI.freezeReservedRegs();
3215
3216 if (homogeneousPrologEpilog(MF)) {
3219
3220 for (auto &RPI : RegPairs) {
3221 MIB.addReg(RPI.Reg1);
3222 MIB.addReg(RPI.Reg2);
3223
3224
3225 if (.isReserved(RPI.Reg1))
3227 if (RPI.isPaired() && .isReserved(RPI.Reg2))
3229 }
3230 return true;
3231 }
3232 bool PTrueCreated = false;
3233 for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
3234 unsigned Reg1 = RPI.Reg1;
3235 unsigned Reg2 = RPI.Reg2;
3236 unsigned StrOpc;
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248 unsigned Size = TRI->getSpillSize(*RPI.RC);
3249 Align Alignment = TRI->getSpillAlign(*RPI.RC);
3250 switch (RPI.Type) {
3251 case RegPairInfo::GPR:
3252 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
3253 break;
3254 case RegPairInfo::FPR64:
3255 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
3256 break;
3257 case RegPairInfo::FPR128:
3258 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
3259 break;
3260 case RegPairInfo::ZPR:
3261 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
3262 break;
3263 case RegPairInfo::PPR:
3264 StrOpc = AArch64::STR_PXI;
3265 break;
3266 case RegPairInfo::VG:
3267 StrOpc = AArch64::STRXui;
3268 break;
3269 }
3270
3271 unsigned X0Scratch = AArch64::NoRegister;
3272 if (Reg1 == AArch64::VG) {
3273
3275 assert(Reg1 != AArch64::NoRegister);
3277
3278 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
3280
3281
3290
3297 AFI->setVGIdx(RPI.FrameIdx);
3298 } else {
3303 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3304 AArch64::X0, LiveIn.PhysReg);
3305 }))
3306 X0Scratch = Reg1;
3307
3308 if (X0Scratch != AArch64::NoRegister)
3310 .addReg(AArch64::XZR)
3314
3315 const uint32_t *RegMask = TRI->getCallPreservedMask(
3316 MF,
3323 Reg1 = AArch64::X0;
3324 AFI->setVGIdx(RPI.FrameIdx);
3325 }
3326 }
3327
3329 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
3330 dbgs() << ") -> fi#(" << RPI.FrameIdx;
3331 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
3332 dbgs() << ")\n");
3333
3334 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
3335 "Windows unwdinding requires a consecutive (FP,LR) pair");
3336
3337
3338
3339 unsigned FrameIdxReg1 = RPI.FrameIdx;
3340 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3341 if (NeedsWinCFI && RPI.isPaired()) {
3343 std::swap(FrameIdxReg1, FrameIdxReg2);
3344 }
3345
3346 if (RPI.isPaired() && RPI.isScalable()) {
3352 "Expects SVE2.1 or SME2 target and a predicate register");
3353#ifdef EXPENSIVE_CHECKS
3354 auto IsPPR = [](const RegPairInfo &c) {
3355 return c.Reg1 == RegPairInfo::PPR;
3356 };
3357 auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
3358 auto IsZPR = [](const RegPairInfo &c) {
3359 return c.Type == RegPairInfo::ZPR;
3360 };
3361 auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
3362 assert(!(PPRBegin < ZPRBegin) &&
3363 "Expected callee save predicate to be handled first");
3364#endif
3365 if (!PTrueCreated) {
3366 PTrueCreated = true;
3369 }
3371 if (.isReserved(Reg1))
3373 if (.isReserved(Reg2))
3375 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
3380 MIB.addReg(AArch64::SP)
3381 .addImm(RPI.Offset / 2)
3382
3387 if (NeedsWinCFI)
3389 } else {
3391 if (.isReserved(Reg1))
3393 if (RPI.isPaired()) {
3394 if (.isReserved(Reg2))
3400 }
3402 .addReg(AArch64::SP)
3403 .addImm(RPI.Offset)
3404
3409 if (NeedsWinCFI)
3411 }
3412
3414 if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
3416 if (RPI.isPaired())
3418 }
3419
3420 if (X0Scratch != AArch64::NoRegister)
3422 .addReg(AArch64::XZR)
3426 }
3427 return true;
3428}
3429
3438
3440 DL = MBBI->getDebugLoc();
3441
3443 if (homogeneousPrologEpilog(MF, &MBB)) {
3446 for (auto &RPI : RegPairs) {
3449 }
3450 return true;
3451 }
3452
3453
3454 auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
3455 auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
3456 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);
3457 std::reverse(PPRBegin, PPREnd);
3458 auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
3459 auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
3460 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);
3461 std::reverse(ZPRBegin, ZPREnd);
3462
3463 bool PTrueCreated = false;
3464 for (const RegPairInfo &RPI : RegPairs) {
3465 unsigned Reg1 = RPI.Reg1;
3466 unsigned Reg2 = RPI.Reg2;
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476 unsigned LdrOpc;
3477 unsigned Size = TRI->getSpillSize(*RPI.RC);
3478 Align Alignment = TRI->getSpillAlign(*RPI.RC);
3479 switch (RPI.Type) {
3480 case RegPairInfo::GPR:
3481 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
3482 break;
3483 case RegPairInfo::FPR64:
3484 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
3485 break;
3486 case RegPairInfo::FPR128:
3487 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
3488 break;
3489 case RegPairInfo::ZPR:
3490 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
3491 break;
3492 case RegPairInfo::PPR:
3493 LdrOpc = AArch64::LDR_PXI;
3494 break;
3495 case RegPairInfo::VG:
3496 continue;
3497 }
3499 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
3500 dbgs() << ") -> fi#(" << RPI.FrameIdx;
3501 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
3502 dbgs() << ")\n");
3503
3504
3505
3506
3507 unsigned FrameIdxReg1 = RPI.FrameIdx;
3508 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3509 if (NeedsWinCFI && RPI.isPaired()) {
3511 std::swap(FrameIdxReg1, FrameIdxReg2);
3512 }
3513
3515 if (RPI.isPaired() && RPI.isScalable()) {
3520 "Expects SVE2.1 or SME2 target and a predicate register");
3521#ifdef EXPENSIVE_CHECKS
3522 assert(!(PPRBegin < ZPRBegin) &&
3523 "Expected callee save predicate to be handled first");
3524#endif
3525 if (!PTrueCreated) {
3526 PTrueCreated = true;
3529 }
3531 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
3537 MIB.addReg(AArch64::SP)
3538 .addImm(RPI.Offset / 2)
3539
3544 if (NeedsWinCFI)
3546 } else {
3548 if (RPI.isPaired()) {
3553 }
3555 MIB.addReg(AArch64::SP)
3556 .addImm(RPI.Offset)
3557
3562 if (NeedsWinCFI)
3564 }
3565 }
3566 return true;
3567}
3568
3569
3572 auto *PSV =
3573 dyn_cast_or_null(MMO->getPseudoValue());
3574 if (PSV)
3575 return std::optional(PSV->getFrameIndex());
3576
3580 FI++)
3582 return FI;
3583 }
3584 }
3585
3586 return std::nullopt;
3587}
3588
3589
3592 if (.mayLoadOrStore() || MI.getNumMemOperands() < 1)
3593 return std::nullopt;
3594
3596}
3597
3598
3599
3600
3601void AArch64FrameLowering::determineStackHazardSlot(
3604 if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
3606 return;
3607
3608
3611 return;
3612
3614
3615
3616
3617 bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
3618 return AArch64::FPR64RegClass.contains(Reg) ||
3619 AArch64::FPR128RegClass.contains(Reg) ||
3620 AArch64::ZPRRegClass.contains(Reg) ||
3621 AArch64::PPRRegClass.contains(Reg);
3622 });
3623 bool HasFPRStackObjects = false;
3624 if (!HasFPRCSRs) {
3626 for (auto &MBB : MF) {
3629 if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
3632 FrameObjects[*FI] |= 2;
3633 else
3634 FrameObjects[*FI] |= 1;
3635 }
3636 }
3637 }
3638 HasFPRStackObjects =
3639 any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
3640 }
3641
3642 if (HasFPRCSRs || HasFPRStackObjects) {
3644 LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size "
3645 << StackHazardSize << "\n");
3647 }
3648}
3649
3653
3654
3656 return;
3657
3663 unsigned UnspilledCSGPR = AArch64::NoRegister;
3664 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
3665
3668
3669 unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
3671 : (unsigned)AArch64::NoRegister;
3672
3673 unsigned ExtraCSSpill = 0;
3674 bool HasUnpairedGPR64 = false;
3675 bool HasPairZReg = false;
3676
3677 for (unsigned i = 0; CSRegs[i]; ++i) {
3678 const unsigned Reg = CSRegs[i];
3679
3680
3681 if (Reg == BasePointerReg)
3682 SavedRegs.set(Reg);
3683
3684 bool RegUsed = SavedRegs.test(Reg);
3685 unsigned PairedReg = AArch64::NoRegister;
3686 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
3687 if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
3688 AArch64::FPR128RegClass.contains(Reg)) {
3689
3690
3691 if (HasUnpairedGPR64)
3692 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
3693 else
3694 PairedReg = CSRegs[i ^ 1];
3695 }
3696
3697
3698
3699
3700
3701 if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
3702 PairedReg = AArch64::NoRegister;
3703 HasUnpairedGPR64 = true;
3704 }
3705 assert(PairedReg == AArch64::NoRegister ||
3706 AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
3707 AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
3708 AArch64::FPR128RegClass.contains(Reg, PairedReg));
3709
3710 if (!RegUsed) {
3711 if (AArch64::GPR64RegClass.contains(Reg) &&
3713 UnspilledCSGPR = Reg;
3714 UnspilledCSGPRPaired = PairedReg;
3715 }
3716 continue;
3717 }
3718
3719
3720
3721
3722 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
3723 !SavedRegs.test(PairedReg)) {
3724 SavedRegs.set(PairedReg);
3725 if (AArch64::GPR64RegClass.contains(PairedReg) &&
3727 ExtraCSSpill = PairedReg;
3728 }
3729
3730 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
3731 SavedRegs.test(CSRegs[i ^ 1]));
3732 }
3733
3736
3737
3739 if (PnReg != AArch64::NoRegister)
3741
3745 SavedRegs.set(AArch64::P8);
3747 }
3748
3750 "Predicate cannot be a reserved register");
3751 }
3752
3755
3756
3757
3758
3759
3760 SavedRegs.set(AArch64::X18);
3761 }
3762
3763
3764 unsigned CSStackSize = 0;
3765 unsigned SVECSStackSize = 0;
3767 for (unsigned Reg : SavedRegs.set_bits()) {
3768 auto *RC = TRI->getMinimalPhysRegClass(Reg);
3769 assert(RC && "expected register class!");
3770 auto SpillSize = TRI->getSpillSize(*RC);
3771 if (AArch64::PPRRegClass.contains(Reg) ||
3772 AArch64::ZPRRegClass.contains(Reg))
3773 SVECSStackSize += SpillSize;
3774 else
3775 CSStackSize += SpillSize;
3776 }
3777
3778
3779
3780
3781
3785 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3786 CSStackSize += 16;
3787 else
3788 CSStackSize += 8;
3789 }
3790
3791
3792
3793 determineStackHazardSlot(MF, SavedRegs);
3794 if (AFI->hasStackHazardSlotIndex())
3796
3797
3798 unsigned NumSavedRegs = SavedRegs.count();
3799
3800
3802 if (hasFP(MF) ||
3804 SavedRegs.set(AArch64::FP);
3805 SavedRegs.set(AArch64::LR);
3806 }
3807
3809 dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
3810 for (unsigned Reg : SavedRegs.set_bits())
3812 dbgs() << "\n";
3813 });
3814
3815
3816 int64_t SVEStackSize =
3817 alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
3818 bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
3819
3820
3821
3823
3824
3825
3826
3827 int64_t CalleeStackUsed = 0;
3830 if (FixedOff > CalleeStackUsed)
3831 CalleeStackUsed = FixedOff;
3832 }
3833
3834
3835 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
3836 CalleeStackUsed) > EstimatedStackSizeLimit;
3838 AFI->setHasStackFrame(true);
3839
3840
3841
3842
3843
3844
3845
3846 if (BigStack) {
3847 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
3849 << " to get a scratch register.\n");
3850 SavedRegs.set(UnspilledCSGPR);
3851 ExtraCSSpill = UnspilledCSGPR;
3852
3853
3854
3855
3856 if (producePairRegisters(MF)) {
3857 if (UnspilledCSGPRPaired == AArch64::NoRegister) {
3858
3860 SavedRegs.reset(UnspilledCSGPR);
3861 ExtraCSSpill = AArch64::NoRegister;
3862 }
3863 } else
3864 SavedRegs.set(UnspilledCSGPRPaired);
3865 }
3866 }
3867
3868
3869
3873 unsigned Size = TRI->getSpillSize(RC);
3874 Align Alignment = TRI->getSpillAlign(RC);
3877 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
3878 << " as the emergency spill slot.\n");
3879 }
3880 }
3881
3882
3883 CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
3884
3885
3886
3887 if (hasFP(MF) && AFI->hasSwiftAsyncContext())
3888 CSStackSize += 8;
3889
3892 << EstimatedStackSize + AlignedCSStackSize << " bytes.\n");
3893
3895 AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
3896 "Should not invalidate callee saved info");
3897
3898
3899
3900 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
3901 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
3902 AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
3903}
3904
3907 std::vector &CSI, unsigned &MinCSFrameIndex,
3908 unsigned &MaxCSFrameIndex) const {
3911
3912
3913
3914
3915
3916 if (NeedsWinCFI)
3917 std::reverse(CSI.begin(), CSI.end());
3918
3919 if (CSI.empty())
3920 return true;
3921
3922
3923
3926
3931 if ((unsigned)FrameIdx < MinCSFrameIndex)
3932 MinCSFrameIndex = FrameIdx;
3933 if ((unsigned)FrameIdx > MaxCSFrameIndex)
3934 MaxCSFrameIndex = FrameIdx;
3935 }
3936
3937
3939 std::vector VGSaves;
3941
3943 VGInfo.setRestored(false);
3944 VGSaves.push_back(VGInfo);
3945
3946
3947
3948 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3949 VGSaves.push_back(VGInfo);
3950
3951 bool InsertBeforeLR = false;
3952
3953 for (unsigned I = 0; I < CSI.size(); I++)
3954 if (CSI[I].getReg() == AArch64::LR) {
3955 InsertBeforeLR = true;
3956 CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
3957 break;
3958 }
3959
3960 if (!InsertBeforeLR)
3961 CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
3962 }
3963
3965 int HazardSlotIndex = std::numeric_limits::max();
3966 for (auto &CS : CSI) {
3969
3970
3974 assert(HazardSlotIndex == std::numeric_limits::max() &&
3975 "Unexpected register order for hazard slot");
3977 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
3978 << "\n");
3980 if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
3981 MinCSFrameIndex = HazardSlotIndex;
3982 if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
3983 MaxCSFrameIndex = HazardSlotIndex;
3984 }
3985
3989 CS.setFrameIdx(FrameIdx);
3990
3991 if ((unsigned)FrameIdx < MinCSFrameIndex)
3992 MinCSFrameIndex = FrameIdx;
3993 if ((unsigned)FrameIdx > MaxCSFrameIndex)
3994 MaxCSFrameIndex = FrameIdx;
3995
3996
3998 Reg == AArch64::FP) {
4001 if ((unsigned)FrameIdx < MinCSFrameIndex)
4002 MinCSFrameIndex = FrameIdx;
4003 if ((unsigned)FrameIdx > MaxCSFrameIndex)
4004 MaxCSFrameIndex = FrameIdx;
4005 }
4006 LastReg = Reg;
4007 }
4008
4009
4011 HazardSlotIndex == std::numeric_limits::max()) {
4013 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
4014 << "\n");
4016 if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
4017 MinCSFrameIndex = HazardSlotIndex;
4018 if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
4019 MaxCSFrameIndex = HazardSlotIndex;
4020 }
4021
4022 return true;
4023}
4024
4028
4029
4030
4031
4033 return false;
4034
4035
4037 return false;
4039}
4040
4041
4043 int &Min, int &Max) {
4044 Min = std::numeric_limits::max();
4045 Max = std::numeric_limits::min();
4046
4048 return false;
4049
4051 for (auto &CS : CSI) {
4052 if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
4053 AArch64::PPRRegClass.contains(CS.getReg())) {
4054 assert((Max == std::numeric_limits::min() ||
4055 Max + 1 == CS.getFrameIdx()) &&
4056 "SVE CalleeSaves are not consecutive");
4057
4058 Min = std::min(Min, CS.getFrameIdx());
4059 Max = std::max(Max, CS.getFrameIdx());
4060 }
4061 }
4062 return Min != std::numeric_limits::max();
4063}
4064
4065
4066
4067
4068
4069
4071 int &MinCSFrameIndex,
4072 int &MaxCSFrameIndex,
4073 bool AssignOffsets) {
4074#ifndef NDEBUG
4075
4078 "SVE vectors should never be passed on the stack by value, only by "
4079 "reference.");
4080#endif
4081
4082 auto Assign = [&MFI](int FI, int64_t Offset) {
4085 };
4086
4088
4089
4091
4092 for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
4095 if (AssignOffsets)
4097 }
4098 }
4099
4100
4102
4103
4105
4106
4107
4108 int StackProtectorFI = -1;
4112 ObjectsToAllocate.push_back(StackProtectorFI);
4113 }
4117 continue;
4118 if (I == StackProtectorFI)
4119 continue;
4120 if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
4121 continue;
4123 continue;
4124
4126 }
4127
4128
4129 for (unsigned FI : ObjectsToAllocate) {
4131
4132
4133
4134 if (Alignment > Align(16))
4136 "Alignment of scalable vectors > 16 bytes is not yet supported");
4137
4139 if (AssignOffsets)
4140 Assign(FI, -Offset);
4141 }
4142
4144}
4145
4146int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
4148 int MinCSFrameIndex, MaxCSFrameIndex;
4150}
4151
4152int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
4153 MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
4155 true);
4156}
4157
4161
4163 "Upwards growing stack unsupported");
4164
4165 int MinCSFrameIndex, MaxCSFrameIndex;
4166 int64_t SVEStackSize =
4167 assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
4168
4172
4173
4174
4176 return;
4179
4184
4185
4186
4187 int64_t FixedObject =
4190 -FixedObject,
4191 false);
4193
4194
4195
4200 assert(DstReg && "There must be a free register after frame setup");
4206}
4207
4208namespace {
4209struct TagStoreInstr {
4214};
4215
4216class TagStoreEdit {
4220
4222
4224
4225
4226
4229 int64_t Size;
4230
4231
4232 std::optional<int64_t> FrameRegUpdate;
4233
4234 unsigned FrameRegUpdateFlags;
4235
4236
4237 bool ZeroData;
4239
4242
4243public:
4245 : MBB(MBB), ZeroData(ZeroData) {
4248 }
4249
4250
4251 void addInstruction(TagStoreInstr I) {
4253 TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
4254 "Non-adjacent tag store instructions.");
4256 }
4257 void clear() { TagStores.clear(); }
4258
4259
4260
4263};
4264
4268
4269 const int64_t kMinOffset = -256 * 16;
4270 const int64_t kMaxOffset = 255 * 16;
4271
4272 Register BaseReg = FrameReg;
4273 int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
4274 if (BaseRegOffsetBytes < kMinOffset ||
4275 BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset ||
4276
4277
4278
4279 BaseRegOffsetBytes % 16 != 0) {
4280 Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4283 BaseReg = ScratchReg;
4284 BaseRegOffsetBytes = 0;
4285 }
4286
4288 while (Size) {
4289 int64_t InstrSize = (Size > 16) ? 32 : 16;
4290 unsigned Opcode =
4291 InstrSize == 16
4292 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
4293 : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
4294 assert(BaseRegOffsetBytes % 16 == 0);
4296 .addReg(AArch64::SP)
4298 .addImm(BaseRegOffsetBytes / 16)
4300
4301
4302 if (BaseRegOffsetBytes == 0)
4303 LastI = I;
4304 BaseRegOffsetBytes += InstrSize;
4305 Size -= InstrSize;
4306 }
4307
4308 if (LastI)
4310}
4311
4315
4316 Register BaseReg = FrameRegUpdate
4317 ? FrameReg
4318 : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4319 Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4320
4322
4323 int64_t LoopSize = Size;
4324
4325
4326 if (FrameRegUpdate && *FrameRegUpdate)
4327 LoopSize -= LoopSize % 32;
4329 TII->get(ZeroData ? AArch64::STZGloop_wback
4330 : AArch64::STGloop_wback))
4336 if (FrameRegUpdate)
4337 LoopI->setFlags(FrameRegUpdateFlags);
4338
4339 int64_t ExtraBaseRegUpdate =
4340 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
4341 LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize
4342 << ", Size=" << Size
4343 << ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
4344 << ", FrameRegUpdate=" << FrameRegUpdate
4345 << ", FrameRegOffset.getFixed()="
4346 << FrameRegOffset.getFixed() << "\n");
4347 if (LoopSize < Size) {
4348 assert(FrameRegUpdate);
4350
4351 int64_t STGOffset = ExtraBaseRegUpdate + 16;
4352 assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
4353 "STG immediate out of range");
4355 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
4359 .addImm(STGOffset / 16)
4362 } else if (ExtraBaseRegUpdate) {
4363
4364 int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
4365 assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range");
4368 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
4371 .addImm(AddSubOffset)
4374 }
4375}
4376
4377
4378
4379
4381 int64_t Size, int64_t *TotalOffset) {
4383 if ((MI.getOpcode() == AArch64::ADDXri ||
4384 MI.getOpcode() == AArch64::SUBXri) &&
4385 MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
4387 int64_t Offset = MI.getOperand(2).getImm() << Shift;
4388 if (MI.getOpcode() == AArch64::SUBXri)
4391
4392
4393
4394
4395
4396
4397
4398
4399 const int64_t kMaxOffset = 4080 - 16;
4400
4401 const int64_t kMinOffset = -4095;
4402 if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
4403 PostOffset % 16 == 0) {
4404 *TotalOffset = Offset;
4405 return true;
4406 }
4407 }
4408 return false;
4409}
4410
4413 MemRefs.clear();
4414 for (auto &TS : TSE) {
4416
4417
4418 if (MI->memoperands_empty()) {
4419 MemRefs.clear();
4420 return;
4421 }
4422 MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
4423 }
4424}
4425
4428 bool TryMergeSPUpdate) {
4429 if (TagStores.empty())
4430 return;
4431 TagStoreInstr &FirstTagStore = TagStores[0];
4432 TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
4433 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
4434 DL = TagStores[0].MI->getDebugLoc();
4435
4438 *MF, FirstTagStore.Offset, false , false , Reg,
4439 false, true);
4440 FrameReg = Reg;
4441 FrameRegUpdate = std::nullopt;
4442
4443 mergeMemRefs(TagStores, CombinedMemRefs);
4444
4446 dbgs() << "Replacing adjacent STG instructions:\n";
4447 for (const auto &Instr : TagStores) {
4449 }
4450 });
4451
4452
4453
4456 if (TagStores.size() < 2)
4457 return;
4458 emitUnrolled(InsertI);
4459 } else {
4461 int64_t TotalOffset = 0;
4462 if (TryMergeSPUpdate) {
4463
4464
4465
4466
4467
4468 if (InsertI != MBB->end() &&
4469 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
4470 &TotalOffset)) {
4471 UpdateInstr = &*InsertI++;
4472 LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
4473 << *UpdateInstr);
4474 }
4475 }
4476
4477 if (!UpdateInstr && TagStores.size() < 2)
4478 return;
4479
4480 if (UpdateInstr) {
4481 FrameRegUpdate = TotalOffset;
4482 FrameRegUpdateFlags = UpdateInstr->getFlags();
4483 }
4484 emitLoop(InsertI);
4485 if (UpdateInstr)
4487 }
4488
4489 for (auto &TS : TagStores)
4490 TS.MI->eraseFromParent();
4491}
4492
4494 int64_t &Size, bool &ZeroData) {
4497
4498 unsigned Opcode = MI.getOpcode();
4499 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
4500 Opcode == AArch64::STZ2Gi);
4501
4502 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
4503 if (.getOperand(0).isDead() ||
.getOperand(1).isDead())
4504 return false;
4505 if (.getOperand(2).isImm() ||
.getOperand(3).isFI())
4506 return false;
4508 Size = MI.getOperand(2).getImm();
4509 return true;
4510 }
4511
4512 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
4514 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
4516 else
4517 return false;
4518
4519 if (MI.getOperand(0).getReg() != AArch64::SP || .getOperand(1).isFI())
4520 return false;
4521
4523 16 * MI.getOperand(2).getImm();
4524 return true;
4525}
4526
4527
4528
4529
4530
4531
4532
4536 bool FirstZeroData;
4542 return II;
4543 if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
4544 return II;
4545
4548
4549 constexpr int kScanLimit = 10;
4550 int Count = 0;
4552 NextI != E && Count < kScanLimit; ++NextI) {
4554 bool ZeroData;
4556
4557
4558
4559
4560
4561 if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
4562 if (ZeroData != FirstZeroData)
4563 break;
4565 continue;
4566 }
4567
4568
4569
4570 if (.isTransient())
4571 ++Count;
4572
4573
4576 break;
4577
4578
4579 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() || MI.isCall())
4580 break;
4581 }
4582
4583
4585
4586
4587
4588
4589
4590
4591
4592
4593
4595 LiveRegs.addLiveOuts(*MBB);
4598 if (MI == InsertI)
4599 break;
4600 LiveRegs.stepBackward(*I);
4601 }
4602 InsertI++;
4603 if (LiveRegs.contains(AArch64::NZCV))
4604 return InsertI;
4605
4607 [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
4608 return Left.Offset < Right.Offset;
4609 });
4610
4611
4612 int64_t CurOffset = Instrs[0].Offset;
4613 for (auto &Instr : Instrs) {
4614 if (CurOffset > Instr.Offset)
4615 return NextI;
4616 CurOffset = Instr.Offset + Instr.Size;
4617 }
4618
4619
4620
4621 TagStoreEdit TSE(MBB, FirstZeroData);
4622 std::optional<int64_t> EndOffset;
4623 for (auto &Instr : Instrs) {
4624 if (EndOffset && *EndOffset != Instr.Offset) {
4625
4626 TSE.emitCode(InsertI, TFI, false);
4627 TSE.clear();
4628 }
4629
4630 TSE.addInstruction(Instr);
4631 EndOffset = Instr.Offset + Instr.Size;
4632 }
4633
4635
4636 TSE.emitCode(
4637 InsertI, TFI,
4639
4640 return InsertI;
4641}
4642}
4643
4649
4650 if (MI.getOpcode() != AArch64::VGSavePseudo &&
4651 MI.getOpcode() != AArch64::VGRestorePseudo)
4652 return II;
4653
4655 bool LocallyStreaming =
4661
4662 int64_t VGFrameIdx =
4664 assert(VGFrameIdx != std::numeric_limits::max() &&
4665 "Expected FrameIdx for VG");
4666
4667 unsigned CFIIndex;
4668 if (MI.getOpcode() == AArch64::VGSavePseudo) {
4673 nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset));
4674 } else
4676 nullptr, TRI->getDwarfRegNum(AArch64::VG, true)));
4677
4679 TII->get(TargetOpcode::CFI_INSTRUCTION))
4681
4682 MI.eraseFromParent();
4684}
4685
4688 for (auto &BB : MF)
4693 II = tryMergeAdjacentSTG(II, this, RS);
4694 }
4695}
4696
4697
4698
4699
4702 bool IgnoreSPUpdates) const {
4704 if (IgnoreSPUpdates) {
4705 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
4707 FrameReg = AArch64::SP;
4709 }
4710
4711
4716
4717 FrameReg = AArch64::SP;
4719}
4720
4721
4722
4725 return 0;
4726}
4727
4728
4729
4732
4733 unsigned CSSize =
4735
4738}
4739
4740namespace {
4741struct FrameObject {
4742 bool IsValid = false;
4743
4744 int ObjectIndex = 0;
4745
4746 int GroupIndex = -1;
4747
4748 bool ObjectFirst = false;
4749
4750
4751 bool GroupFirst = false;
4752
4753
4754
4755 unsigned Accesses = 0;
4756 enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };
4757};
4758
4759class GroupBuilder {
4761 int NextGroupIndex = 0;
4762 std::vector &Objects;
4763
4764public:
4765 GroupBuilder(std::vector &Objects) : Objects(Objects) {}
4767 void EndCurrentGroup() {
4768 if (CurrentMembers.size() > 1) {
4769
4770
4771
4773 for (int Index : CurrentMembers) {
4774 Objects[Index].GroupIndex = NextGroupIndex;
4776 }
4778 NextGroupIndex++;
4779 }
4780 CurrentMembers.clear();
4781 }
4782};
4783
4784bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806 return std::make_tuple(.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst,
4807 A.GroupIndex, A.ObjectIndex) <
4808 std::make_tuple(.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst,
4809 B.GroupIndex, B.ObjectIndex);
4810}
4811}
4812
4816 return;
4817
4820 std::vector FrameObjects(MFI.getObjectIndexEnd());
4821 for (auto &Obj : ObjectsToAllocate) {
4822 FrameObjects[Obj].IsValid = true;
4823 FrameObjects[Obj].ObjectIndex = Obj;
4824 }
4825
4826
4827
4828 GroupBuilder GB(FrameObjects);
4829 for (auto &MBB : MF) {
4831 if (MI.isDebugInstr())
4832 continue;
4833
4836 if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
4839 FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
4840 else
4841 FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
4842 }
4843 }
4844
4846 switch (MI.getOpcode()) {
4847 case AArch64::STGloop:
4848 case AArch64::STZGloop:
4850 break;
4851 case AArch64::STGi:
4852 case AArch64::STZGi:
4853 case AArch64::ST2Gi:
4854 case AArch64::STZ2Gi:
4856 break;
4857 default:
4859 }
4860
4861 int TaggedFI = -1;
4864 if (MO.isFI()) {
4866 if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
4867 FrameObjects[FI].IsValid)
4868 TaggedFI = FI;
4869 }
4870 }
4871
4872
4873
4874 if (TaggedFI >= 0)
4875 GB.AddMember(TaggedFI);
4876 else
4877 GB.EndCurrentGroup();
4878 }
4879
4880 GB.EndCurrentGroup();
4881 }
4882
4885 FrameObject::AccessHazard;
4886
4887 for (auto &Obj : FrameObjects)
4888 if (!Obj.Accesses ||
4889 Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
4890 Obj.Accesses = FrameObject::AccessGPR;
4891 }
4892
4893
4894
4895
4896
4898 if (TBPI) {
4899 FrameObjects[*TBPI].ObjectFirst = true;
4900 FrameObjects[*TBPI].GroupFirst = true;
4901 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
4902 if (FirstGroupIndex >= 0)
4903 for (FrameObject &Object : FrameObjects)
4904 if (Object.GroupIndex == FirstGroupIndex)
4905 Object.GroupFirst = true;
4906 }
4907
4909
4910 int i = 0;
4911 for (auto &Obj : FrameObjects) {
4912
4913 if (!Obj.IsValid)
4914 break;
4915 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4916 }
4917
4919 dbgs() << "Final frame order:\n";
4920 for (auto &Obj : FrameObjects) {
4921 if (!Obj.IsValid)
4922 break;
4923 dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
4924 if (Obj.ObjectFirst)
4925 dbgs() << ", first";
4926 if (Obj.GroupFirst)
4927 dbgs() << ", group-first";
4928 dbgs() << "\n";
4929 }
4930 });
4931}
4932
4933
4934
4935
4936
4938AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
4946
4949 MF.insert(MBBInsertPoint, LoopMBB);
4951 MF.insert(MBBInsertPoint, ExitMBB);
4952
4953
4954
4958
4959 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
4960 .addReg(AArch64::XZR)
4961 .addReg(AArch64::SP)
4964
4965 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
4966 AArch64::XZR)
4967 .addReg(AArch64::SP)
4971
4972 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
4976
4979
4983
4985
4986 return ExitMBB->begin();
4987}
4988
4989void AArch64FrameLowering::inlineStackProbeFixed(
4999
5002 int64_t NumBlocks = FrameSize / ProbeSize;
5003 int64_t ResidualSize = FrameSize % ProbeSize;
5004
5005 LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
5006 << NumBlocks << " blocks of " << ProbeSize
5007 << " bytes, plus " << ResidualSize << " bytes\n");
5008
5009
5010
5012 for (int i = 0; i < NumBlocks; ++i) {
5013
5014
5018 EmitAsyncCFI && !HasFP, CFAOffset);
5020
5022 .addReg(AArch64::XZR)
5023 .addReg(AArch64::SP)
5026 }
5027 } else if (NumBlocks != 0) {
5028
5029
5033 EmitAsyncCFI && !HasFP, CFAOffset);
5035 MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
5037 if (EmitAsyncCFI && !HasFP) {
5038
5041 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
5042 unsigned CFIIndex =
5047 }
5048 }
5049
5050 if (ResidualSize != 0) {
5051
5052
5056 EmitAsyncCFI && !HasFP, CFAOffset);
5058
5060 .addReg(AArch64::XZR)
5061 .addReg(AArch64::SP)
5064 }
5065 }
5066}
5067
5068void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
5070
5071
5072
5075 if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
5076 MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
5078
5080 if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
5081 Register ScratchReg = MI->getOperand(0).getReg();
5082 int64_t FrameSize = MI->getOperand(1).getImm();
5084 MI->getOperand(3).getImm());
5085 inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,
5086 CFAOffset);
5087 } else {
5088 assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
5089 "Stack probe pseudo-instruction expected");
5092 Register TargetReg = MI->getOperand(0).getReg();
5093 (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);
5094 }
5095 MI->eraseFromParent();
5096 }
5097}
5098
5101 NotAccessed = 0,
5102 GPR = 1 << 0,
5103 PPR = 1 << 1,
5105 };
5106
5111
5113
5115 return std::make_tuple(start(), Idx) <
5116 std::make_tuple(Rhs.start(), Rhs.Idx);
5117 }
5118
5120
5121 return AccessTypes & (AccessType::GPR | AccessType::PPR);
5122 }
5123 bool isSME() const { return AccessTypes & AccessType::FPR; }
5124 bool isMixed() const { return isCPU() && isSME(); }
5125
5127 int64_t end() const { return start() + Size; }
5128
5130 switch (AccessTypes) {
5131 case AccessType::FPR:
5132 return "FPR";
5133 case AccessType::PPR:
5134 return "PPR";
5135 case AccessType::GPR:
5136 return "GPR";
5137 case AccessType::NotAccessed:
5138 return "NA";
5139 default:
5140 return "Mixed";
5141 }
5142 }
5143
5146 << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
5147 if (Offset.getScalable())
5148 OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
5149 << " * vscale";
5150 OS << "]";
5151 }
5152};
5153
5156 return OS;
5157}
5158
5159void AArch64FrameLowering::emitRemarks(
5161
5163 if (Attrs.hasNonStreamingInterfaceAndBody())
5164 return;
5165
5169
5170 if (HazardSize == 0)
5171 return;
5172
5174
5176 return;
5177
5178 std::vector StackAccesses(MFI.getNumObjects());
5179
5180 size_t NumFPLdSt = 0;
5181 size_t NumNonFPLdSt = 0;
5182
5183
5186 if (.mayLoadOrStore() || MI.getNumMemOperands() < 1)
5187 continue;
5189 std::optional FI = getMMOFrameID(MMO, MFI);
5191 int FrameIdx = *FI;
5192
5195 StackAccesses[ArrIdx].Idx = FrameIdx;
5196 StackAccesses[ArrIdx].Offset =
5198 StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
5199 }
5200
5203 if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
5205 else
5209 }
5210
5211 StackAccesses[ArrIdx].AccessTypes |= RegTy;
5212
5214 ++NumFPLdSt;
5215 else
5216 ++NumNonFPLdSt;
5217 }
5218 }
5219 }
5220 }
5221
5222 if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
5223 return;
5224
5230 }),
5231 StackAccesses.end());
5232
5235
5236 if (StackAccesses.front().isMixed())
5237 MixedObjects.push_back(&StackAccesses.front());
5238
5239 for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
5240 It != End; ++It) {
5241 const auto &First = *It;
5242 const auto &Second = *(It + 1);
5243
5244 if (Second.isMixed())
5245 MixedObjects.push_back(&Second);
5246
5247 if ((First.isSME() && Second.isCPU()) ||
5248 (First.isCPU() && Second.isSME())) {
5250 if (Distance < HazardSize)
5252 }
5253 }
5254
5256 ORE->emit([&]() {
5258 "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
5259 return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
5260 });
5261 };
5262
5263 for (const auto &P : HazardPairs)
5264 EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
5265
5266 for (const auto *Obj : MixedObjects)
5267 EmitRemark(
5268 formatv("{0} accessed by both GP and FP instructions", *Obj).str());
5269}
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL)
static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)
static void emitCalleeSavedRestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool SVE)
static void computeCalleeSaveRegisterPairs(MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI, SmallVectorImpl< RegPairInfo > &RegPairs, bool NeedsFrameRecord)
static const unsigned DefaultSafeSPDisplacement
This is the biggest offset to the stack pointer we can encode in aarch64 instructions (without using ...
static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned FixedObject)
static bool needsWinCFI(const MachineFunction &MF)
static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, unsigned DwarfReg)
static cl::opt< bool > StackTaggingMergeSetTag("stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden)
bool requiresGetVGCall(MachineFunction &MF)
bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, MachineFunction &MF)
bool isVGInstruction(MachineBasicBlock::iterator MBBI)
static std::optional< int > getLdStFrameID(const MachineInstr &MI, const MachineFrameInfo &MFI)
static bool produceCompactUnwindFrame(MachineFunction &MF)
static cl::opt< bool > StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", cl::init(false), cl::Hidden)
static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex, bool AssignOffsets)
static cl::opt< bool > OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden)
static bool windowsRequiresStackProbe(MachineFunction &MF, uint64_t StackSizeInBytes)
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI)
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, bool IsFirst, const TargetRegisterInfo *TRI)
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0)
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
static StackOffset getSVEStackSize(const MachineFunction &MF)
Returns the size of the entire SVE stackframe (calleesaves + spills).
static cl::opt< bool > DisableMultiVectorSpillFill("aarch64-disable-multivector-spill-fill", cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden)
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, MachineInstr::MIFlag Flag)
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB)
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
cl::opt< bool > EnableHomogeneousPrologEpilog("homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)"))
MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, const AArch64FrameLowering *TFI)
static bool IsSVECalleeSave(MachineBasicBlock::iterator I)
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, bool IsFirst, const TargetRegisterInfo *TRI)
Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
unsigned findFreePredicateReg(BitVector &SavedRegs)
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg)
static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
static bool isTargetWindows(const MachineFunction &MF)
static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset)
static int64_t upperBound(StackOffset Size)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max)
returns true if there are any SVE callee saves.
static cl::opt< unsigned > StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), cl::Hidden)
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE)
static bool isFuncletReturnInstr(const MachineInstr &MI)
static unsigned getStackHazardSize(const MachineFunction &MF)
static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool NeedsWinCFI, bool NeedsUnwindInfo)
static std::optional< int > getMMOFrameID(MachineMemOperand *MMO, const MachineFrameInfo &MFI)
static bool requiresSaveVG(MachineFunction &MF)
static unsigned getFixedObjectSize(const MachineFunction &MF, const AArch64FunctionInfo *AFI, bool IsWin64, bool IsFunclet)
Returns the size of the fixed object area (allocated next to sp on entry) On Win64 this may include a...
static const int kSetTagLoopThreshold
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
#define CASE(ATTRNAME, AANAME,...)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static const HTTPClientCleanup Cleanup
const HexagonInstrInfo * TII
static std::string getTypeString(Type *T)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const unsigned FramePtr
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool enableStackSlotScavenging(const MachineFunction &MF) const override
Returns true if the stack slot holes in the fixed and callee-save stack area should be used when allo...
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI) const override
getFrameIndexReferenceFromSP - This method returns the offset from the stack pointer to the slot of t...
StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override
getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...
TargetStackID::Value getStackIDForScalableVectors() const override
Returns the StackID that scalable vectors should be associated with.
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
bool enableCFIFixup(MachineFunction &MF) const override
Returns true if we may need to fix the unwind information for the function.
void resetCFIToInitialState(MachineBasicBlock &MBB) const override
Emit CFI instructions that recreate the state of the unwind information upon fucntion entry.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
bool canUseRedZone(const MachineFunction &MF) const
Can this function use the red zone for local allocations.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const
Funclets only need to account for space for the callee saved registers, as the locals are accounted f...
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack frame.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, Register &FrameReg, bool PreferFP, bool ForSimm) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
For Win64 AArch64 EH, the offset to the Unwind object is from the SP before the update.
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve the parent's frame pointer...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) const
void setSwiftAsyncContextFrameIdx(int FI)
unsigned getTailCallReservedStack() const
unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const
void setCalleeSaveBaseToFrameRecordOffset(int Offset)
bool hasStackProbing() const
unsigned getArgumentStackToRestore() const
void setLocalStackSize(uint64_t Size)
void setVGIdx(unsigned Idx)
int getCalleeSaveBaseToFrameRecordOffset() const
bool hasStreamingModeChanges() const
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setPredicateRegForFillSpill(unsigned Reg)
int getStackHazardSlotIndex() const
void setStreamingVGIdx(unsigned FrameIdx)
int64_t getStackProbeSize() const
uint64_t getStackSizeSVE() const
void setHasRedZone(bool s)
bool hasStackFrame() const
std::optional< int > getTaggedBasePointerIndex() const
uint64_t getLocalStackSize() const
void setStackRealigned(bool s)
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
unsigned getVarArgsGPRSize() const
void setStackSizeSVE(uint64_t S)
bool isStackRealigned() const
bool hasSwiftAsyncContext() const
bool hasStackHazardSlotIndex() const
void setTaggedBasePointerOffset(unsigned Offset)
void setStackHazardCSRSlotIndex(int Index)
unsigned getPredicateRegForFillSpill() const
unsigned getSVECalleeSavedStackSize() const
bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const
int64_t getStreamingVGIdx() const
void setMinMaxSVECSFrameIndex(int Min, int Max)
bool hasCalleeSaveStackFreeSpace() const
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
bool hasBasePointer(const MachineFunction &MF) const
bool cannotEliminateFrame(const MachineFunction &MF) const
unsigned getBaseRegister() const
bool isTargetWindows() const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64InstrInfo * getInstrInfo() const override
bool isTargetILP32() const
const AArch64TargetLowering * getTargetLowering() const override
bool isTargetMachO() const
const Triple & getTargetTriple() const
const char * getChkStkName() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
bool isStreaming() const
Returns true if the function has a streaming body.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
unsigned getRedZoneSize(const Function &F) const
bool supportSwiftError() const override
Return true if the target supports swifterror attribute.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value.
bool test(unsigned Idx) const
size_type count() const
count - Returns the number of bits which are set.
iterator_range< const_set_bits_iterator > set_bits() const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
Emit instructions to copy a pair of physical registers.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set.
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
bool usesWindowsCFI() const
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
static MCCFIInstruction createNegateRAStateWithPC(MCSymbol *L, SMLoc Loc={})
.cfi_negate_ra_state_with_pc AArch64 negate RA state with PC.
static MCCFIInstruction createNegateRAState(MCSymbol *L, SMLoc Loc={})
.cfi_negate_ra_state AArch64 negate RA state.
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator instr_begin()
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MachineInstr & instr_back()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
instr_iterator instr_end()
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
reverse_iterator rbegin()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
int getStackProtectorIndex() const
Return the index for the stack protector object.
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isMaxCallFrameSizeComputed() const
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getNumObjects() const
Return the number of objects.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
void setHasWinCFI(bool v)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool hasEHFunclets() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
void setFlags(unsigned flags)
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
uint32_t getFlags() const
Return the MI flags bitvector.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
static MachineOperand CreateImm(int64_t Val)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool isLiveIn(Register Reg) const
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void backward()
Update internal register state and move MBB iterator backwards.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
bool hasStreamingBody() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
int64_t getScalable() const
Returns the scalable component of the stack.
static StackOffset get(int64_t Fixed, int64_t Scalable)
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
StringRef - Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
virtual bool enableCFIFixup(MachineFunction &MF) const
Returns true if we may need to fix the unwind information for the function.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Align getSpillAlign(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class.
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
StringRef getArchName() const
Get the architecture (first) component of the triple.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
constexpr ScalarTy getFixedValue() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxLoopUnroll
Maximum number of iterations to unroll for a constant size probing loop.
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ CXX_FAST_TLS
Used for access functions.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ PreserveNone
Used for runtime calls that preserves none general registers.
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
@ Always
Always set the bit.
@ Never
Never set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool operator<(const StackAccess &Rhs) const
void print(raw_ostream &OS) const
std::string getTypeString() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
Pair of physical register and lane mask.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.