LLVM: lib/Target/AArch64/AArch64FrameLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

249#include

250#include

251#include

252#include

253#include

254

255using namespace llvm;

256

257#define DEBUG_TYPE "frame-info"

258

260 cl::desc("enable use of redzone on AArch64"),

262

264 "stack-tagging-merge-settag",

265 cl::desc("merge settag instruction in function epilog"), cl::init(true),

267

269 cl::desc("sort stack allocations"),

271

273 "homogeneous-prolog-epilog", cl::Hidden,

274 cl::desc("Emit homogeneous prologue and epilogue for the size "

275 "optimization (default = off)"));

276

277

281

285

287 "aarch64-disable-multivector-spill-fill",

288 cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),

290

291STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");

292

293

294

295

296

297

298

303 bool IsTailCallReturn = (MBB.end() != MBBI)

305 : false;

306

307 int64_t ArgumentPopSize = 0;

308 if (IsTailCallReturn) {

310

311

312

313

314 ArgumentPopSize = StackAdjust.getImm();

315 } else {

316

317

318

319

321 }

322

323 return ArgumentPopSize;

324}

325

330

331

332

333

334bool AArch64FrameLowering::homogeneousPrologEpilog(

337 return false;

339 return false;

341 return false;

342

343

345 return false;

346

348 return false;

349

350

354 return false;

356 return false;

357

359 if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())

360 return false;

361

362

363

364

366 unsigned NumGPRs = 0;

367 for (unsigned I = 0; CSRegs[I]; ++I) {

369 if (Reg == AArch64::LR) {

370 assert(CSRegs[I + 1] == AArch64::FP);

371 if (NumGPRs % 2 != 0)

372 return false;

373 break;

374 }

375 if (AArch64::GPR64RegClass.contains(Reg))

376 ++NumGPRs;

377 }

378

379 return true;

380}

381

382

383bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {

385}

386

387

388

389

390

392

393

394

395

397

398

399

402 if (MI.isDebugInstr() || MI.isPseudo() ||

403 MI.getOpcode() == AArch64::ADDXri ||

404 MI.getOpcode() == AArch64::ADDSXri)

405 continue;

406

408 if (!MO.isFI())

409 continue;

410

414 return 0;

415 }

416 }

417 }

419}

420

424}

425

426

427

430 bool IsFunclet) {

431 if (!IsWin64 || IsFunclet) {

433 } else {

436 Attribute::SwiftAsync))

437 report_fatal_error("cannot generate ABI-changing tail call for Win64");

438

440

441 const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);

443 alignTo(VarArgsArea + UnwindHelpObject, 16);

444 }

445}

446

447

451}

452

455 return false;

456

457

458

460 const unsigned RedZoneSize =

462 if (!RedZoneSize)

463 return false;

464

468

469

470

471

472

473 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&

475 !Subtarget.hasSVE();

476

477 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||

479}

480

481

482

486

487

488

489

491 return true;

492

494 return true;

498 return true;

499

500

501

502

503

504

505

506

509 return true;

510

511 return false;

512}

513

514

515

516

517

518

521

522

523

524

525

527}

528

538 unsigned Opc = I->getOpcode();

539 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();

540 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;

541

543 int64_t Amount = I->getOperand(0).getImm();

545 if (!IsDestroy)

546 Amount = -Amount;

547

548

549

550

551 if (CalleePopAmount == 0) {

552

553

554

555

556

557

558

559

560

561

562 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");

563

566

567

568

569

570

571

573 "non-reserved call frame without var sized objects?");

576 inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));

577 } else {

580 }

581 }

582 } else if (CalleePopAmount != 0) {

583

584

585 assert(CalleePopAmount < 0xffffff && "call frame too large");

588 }

590}

591

592void AArch64FrameLowering::emitCalleeSavedGPRLocations(

598 bool LocallyStreaming =

599 Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();

600

602 if (CSI.empty())

603 return;

604

609

610 for (const auto &Info : CSI) {

611 unsigned FrameIdx = Info.getFrameIdx();

613 continue;

614

615 assert(Info.isSpilledToReg() && "Spilling to registers not implemented");

616 int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);

618

619

620

621

622 if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) ||

623 (!LocallyStreaming &&

624 DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true)))

625 continue;

626

632 }

633}

634

635void AArch64FrameLowering::emitCalleeSavedSVELocations(

639

640

642 if (CSI.empty())

643 return;

644

650

651 for (const auto &Info : CSI) {

653 continue;

654

655

656

657 assert(Info.isSpilledToReg() && "Spilling to registers not implemented");

658 unsigned Reg = Info.getReg();

660 continue;

661

665

670 }

671}

672

676 unsigned DwarfReg) {

677 unsigned CFIIndex =

680}

681

684

688 const auto &TRI =

691

692 const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION);

694

695

698 nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0));

700

701

702 if (MFI.shouldSignReturnAddress(MF)) {

703 auto CFIInst = MFI.branchProtectionPAuthLR()

708 }

709

710

711 if (MFI.needsShadowCallStackPrologueEpilogue(MF))

713 TRI.getDwarfRegNum(AArch64::X18, true));

714

715

716 const std::vector &CSI =

718 for (const auto &Info : CSI) {

719 unsigned Reg = Info.getReg();

720 if (TRI.regNeedsCFI(Reg, Reg))

721 continue;

723 TRI.getDwarfRegNum(Reg, true));

724 }

725}

726

729 bool SVE) {

732

734 if (CSI.empty())

735 return;

736

741

742 for (const auto &Info : CSI) {

743 if (SVE !=

745 continue;

746

747 unsigned Reg = Info.getReg();

748 if (SVE &&

750 continue;

751

752 if (Info.isRestored())

753 continue;

754

756 nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));

760 }

761}

762

763void AArch64FrameLowering::emitCalleeSavedGPRRestores(

766}

767

768void AArch64FrameLowering::emitCalleeSavedSVERestores(

771}

772

773

774

776 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;

777 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();

778}

779

780void AArch64FrameLowering::allocateStackSpace(

782 int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,

783 bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,

784 bool FollowupAllocs) const {

785

786 if (!AllocSize)

787 return;

788

795

797 const uint64_t AndMask = ~(MaxAlign - 1);

798

800 Register TargetReg = RealignmentPadding

802 : AArch64::SP;

803

806 EmitCFI, InitialOffset);

807

808 if (RealignmentPadding) {

809

815

816

817

818 assert(!NeedsWinCFI);

819 }

820 return;

821 }

822

823

824

825

826

827

828

829 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {

831 assert(ScratchReg != AArch64::NoRegister);

837

838

839

840

841 if (FollowupAllocs) {

842

844 .addReg(AArch64::XZR)

848 }

849

850 return;

851 }

852

853

854

855

856

858 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {

859 Register ScratchReg = RealignmentPadding

861 : AArch64::SP;

862 assert(ScratchReg != AArch64::NoRegister);

863

866 EmitCFI, InitialOffset);

867 if (RealignmentPadding) {

868

874 }

875 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >

877

879 .addReg(AArch64::XZR)

883 }

884 return;

885 }

886

887

888

889

891 assert(TargetReg != AArch64::NoRegister);

892

895 EmitCFI, InitialOffset);

896 if (RealignmentPadding) {

897

902 }

903

906 if (EmitCFI) {

907

908 unsigned Reg =

909 Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true);

910 unsigned CFIIndex =

915 }

916 if (RealignmentPadding)

918}

919

921 switch (Reg.id()) {

922 default:

923

924

925 return 0;

926

927

928#define CASE(n) \

929 case AArch64::W##n: \

930 case AArch64::X##n: \

931 return AArch64::X##n

951#undef CASE

952

953

954#define CASE(n) \

955 case AArch64::B##n: \

956 case AArch64::H##n: \

957 case AArch64::S##n: \

958 case AArch64::D##n: \

959 case AArch64::Q##n: \

960 return HasSVE ? AArch64::Z##n : AArch64::Q##n

993#undef CASE

994 }

995}

996

997void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,

999

1001

1002

1005 DL = MBBI->getDebugLoc();

1006

1010

1015 if (TRI.isGeneralPurposeRegister(MF, Reg)) {

1016

1018 GPRsToZero.set(XReg);

1020

1022 FPRsToZero.set(XReg);

1023 }

1024 }

1025

1027

1028

1029 for (MCRegister Reg : GPRsToZero.set_bits())

1031

1032

1033 for (MCRegister Reg : FPRsToZero.set_bits())

1035

1036 if (HasSVE) {

1038 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,

1039 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,

1040 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,

1041 AArch64::P15}) {

1042 if (RegsToZero[PReg])

1044 }

1045 }

1046}

1047

1052

1054 for (unsigned i = 0; CSRegs[i]; ++i)

1055 LiveRegs.addReg(CSRegs[i]);

1056}

1057

1058

1059

1060

1061

1062

1063

1064

1065

1066

1067

1068

1071

1072

1073

1074

1077 return AArch64::X9;

1078

1083

1084

1087 return AArch64::X9;

1088

1089 for (unsigned Reg : AArch64::GPR64RegClass) {

1091 return Reg;

1092 }

1093 return AArch64::NoRegister;

1094}

1095

1104

1110

1111

1112 if (!LiveRegs.available(MRI, AArch64::X16) ||

1114 return false;

1115 }

1116

1117

1118

1121 return false;

1122

1123

1124

1125 if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF))

1126 return true;

1127

1128

1130}

1131

1133 uint64_t StackSizeInBytes) {

1136

1137

1138 return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&

1139 StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());

1140}

1141

1145 F.needsUnwindTableEntry();

1146}

1147

1148bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(

1154 if (homogeneousPrologEpilog(MF))

1155 return false;

1156

1158 return false;

1159

1160

1161

1162

1163

1164

1165

1166

1167

1170 return false;

1171

1172

1173

1175 return false;

1176

1177 if (MFI.hasVarSizedObjects())

1178 return false;

1179

1180 if (RegInfo->hasStackRealignment(MF))

1181 return false;

1182

1183

1184

1185

1187 return false;

1188

1189

1190

1192 return false;

1193

1194 return true;

1195}

1196

1197bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(

1199 if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))

1200 return false;

1202 return true;

1203

1204

1205

1208 while (LastI != Begin) {

1209 --LastI;

1210 if (LastI->isTransient())

1211 continue;

1213 break;

1214 }

1215 switch (LastI->getOpcode()) {

1216 case AArch64::STGloop:

1217 case AArch64::STZGloop:

1218 case AArch64::STGi:

1219 case AArch64::STZGi:

1220 case AArch64::ST2Gi:

1221 case AArch64::STZ2Gi:

1222 return false;

1223 default:

1224 return true;

1225 }

1227}

1228

1229

1230

1234 unsigned Opc = MBBI->getOpcode();

1238 unsigned ImmIdx = MBBI->getNumOperands() - 1;

1239 int Imm = MBBI->getOperand(ImmIdx).getImm();

1243

1244 switch (Opc) {

1245 default:

1247 case AArch64::LDPDpost:

1248 Imm = -Imm;

1249 [[fallthrough]];

1250 case AArch64::STPDpre: {

1251 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1252 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());

1253 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))

1258 break;

1259 }

1260 case AArch64::LDPXpost:

1261 Imm = -Imm;

1262 [[fallthrough]];

1263 case AArch64::STPXpre: {

1264 Register Reg0 = MBBI->getOperand(1).getReg();

1265 Register Reg1 = MBBI->getOperand(2).getReg();

1266 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)

1267 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))

1270 else

1271 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))

1276 break;

1277 }

1278 case AArch64::LDRDpost:

1279 Imm = -Imm;

1280 [[fallthrough]];

1281 case AArch64::STRDpre: {

1282 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1283 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))

1287 break;

1288 }

1289 case AArch64::LDRXpost:

1290 Imm = -Imm;

1291 [[fallthrough]];

1292 case AArch64::STRXpre: {

1293 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1294 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))

1298 break;

1299 }

1300 case AArch64::STPDi:

1301 case AArch64::LDPDi: {

1302 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1303 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1304 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))

1309 break;

1310 }

1311 case AArch64::STPXi:

1312 case AArch64::LDPXi: {

1313 Register Reg0 = MBBI->getOperand(0).getReg();

1314 Register Reg1 = MBBI->getOperand(1).getReg();

1315 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)

1316 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))

1319 else

1320 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))

1325 break;

1326 }

1327 case AArch64::STRXui:

1328 case AArch64::LDRXui: {

1329 int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1330 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))

1334 break;

1335 }

1336 case AArch64::STRDui:

1337 case AArch64::LDRDui: {

1338 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1339 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))

1343 break;

1344 }

1345 case AArch64::STPQi:

1346 case AArch64::LDPQi: {

1347 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1348 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1349 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP))

1354 break;

1355 }

1356 case AArch64::LDPQpost:

1357 Imm = -Imm;

1358 [[fallthrough]];

1359 case AArch64::STPQpre: {

1360 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1361 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());

1362 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX))

1367 break;

1368 }

1369 }

1371 return I;

1372}

1373

1374

1376 unsigned LocalStackSize) {

1378 unsigned ImmIdx = MBBI->getNumOperands() - 1;

1379 switch (MBBI->getOpcode()) {

1380 default:

1382 case AArch64::SEH_SaveFPLR:

1383 case AArch64::SEH_SaveRegP:

1384 case AArch64::SEH_SaveReg:

1385 case AArch64::SEH_SaveFRegP:

1386 case AArch64::SEH_SaveFReg:

1387 case AArch64::SEH_SaveAnyRegQP:

1388 case AArch64::SEH_SaveAnyRegQPX:

1389 ImmOpnd = &MBBI->getOperand(ImmIdx);

1390 break;

1391 }

1392 if (ImmOpnd)

1393 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);

1394}

1395

1400}

1401

1404

1405

1407 return false;

1409 if (ST.isTargetDarwin())

1410 return ST.hasSVE();

1411 return true;

1412}

1413

1415 unsigned Opc = MBBI->getOpcode();

1416 if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||

1417 Opc == AArch64::UBFMXri)

1418 return true;

1419

1421 if (Opc == AArch64::ORRXrr)

1422 return true;

1423

1424 if (Opc == AArch64::BL) {

1425 auto Op1 = MBBI->getOperand(0);

1426 return Op1.isSymbol() &&

1427 (StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");

1428 }

1429 }

1430

1431 return false;

1432}

1433

1434

1435

1436

1440 bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,

1442 int CFAOffset = 0) {

1443 unsigned NewOpc;

1444

1445

1446

1447

1448

1453

1454 switch (MBBI->getOpcode()) {

1455 default:

1456 llvm_unreachable("Unexpected callee-save save/restore opcode!");

1457 case AArch64::STPXi:

1458 NewOpc = AArch64::STPXpre;

1459 break;

1460 case AArch64::STPDi:

1461 NewOpc = AArch64::STPDpre;

1462 break;

1463 case AArch64::STPQi:

1464 NewOpc = AArch64::STPQpre;

1465 break;

1466 case AArch64::STRXui:

1467 NewOpc = AArch64::STRXpre;

1468 break;

1469 case AArch64::STRDui:

1470 NewOpc = AArch64::STRDpre;

1471 break;

1472 case AArch64::STRQui:

1473 NewOpc = AArch64::STRQpre;

1474 break;

1475 case AArch64::LDPXi:

1476 NewOpc = AArch64::LDPXpost;

1477 break;

1478 case AArch64::LDPDi:

1479 NewOpc = AArch64::LDPDpost;

1480 break;

1481 case AArch64::LDPQi:

1482 NewOpc = AArch64::LDPQpost;

1483 break;

1484 case AArch64::LDRXui:

1485 NewOpc = AArch64::LDRXpost;

1486 break;

1487 case AArch64::LDRDui:

1488 NewOpc = AArch64::LDRDpost;

1489 break;

1490 case AArch64::LDRQui:

1491 NewOpc = AArch64::LDRQpost;

1492 break;

1493 }

1494

1495 if (NeedsWinCFI) {

1496 auto SEH = std::next(MBBI);

1498 SEH->eraseFromParent();

1499 }

1500

1502 int64_t MinOffset, MaxOffset;

1504 NewOpc, Scale, Width, MinOffset, MaxOffset);

1507

1508

1509

1510 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||

1511 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||

1512 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {

1513

1514

1519 false, false, nullptr, EmitCFI,

1521

1522 return std::prev(MBBI);

1523 }

1524

1527

1528

1529 unsigned OpndIdx = 0;

1530 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;

1531 ++OpndIdx)

1532 MIB.add(MBBI->getOperand(OpndIdx));

1533

1534 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&

1535 "Unexpected immediate offset in first/last callee-save save/restore "

1536 "instruction!");

1537 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&

1538 "Unexpected base register in callee-save save/restore instruction!");

1539 assert(CSStackSizeInc % Scale == 0);

1540 MIB.addImm(CSStackSizeInc / (int)Scale);

1541

1544

1545

1546 if (NeedsWinCFI) {

1547 *HasWinCFI = true;

1549 }

1550

1551 if (EmitCFI) {

1557 }

1558

1560}

1561

1562

1563

1566 bool NeedsWinCFI,

1567 bool *HasWinCFI) {

1569 return;

1570

1571 unsigned Opc = MI.getOpcode();

1572 unsigned Scale;

1573 switch (Opc) {

1574 case AArch64::STPXi:

1575 case AArch64::STRXui:

1576 case AArch64::STPDi:

1577 case AArch64::STRDui:

1578 case AArch64::LDPXi:

1579 case AArch64::LDRXui:

1580 case AArch64::LDPDi:

1581 case AArch64::LDRDui:

1582 Scale = 8;

1583 break;

1584 case AArch64::STPQi:

1585 case AArch64::STRQui:

1586 case AArch64::LDPQi:

1587 case AArch64::LDRQui:

1588 Scale = 16;

1589 break;

1590 default:

1591 llvm_unreachable("Unexpected callee-save save/restore opcode!");

1592 }

1593

1594 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;

1595 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&

1596 "Unexpected base register in callee-save save/restore instruction!");

1597

1599

1600 assert(LocalStackSize % Scale == 0);

1601 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);

1602

1603 if (NeedsWinCFI) {

1604 *HasWinCFI = true;

1606 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");

1608 "Expecting a SEH instruction");

1610 }

1611}

1612

1615}

1616

1619}

1620

1621

1623 switch (I->getOpcode()) {

1624 default:

1625 return false;

1626 case AArch64::PTRUE_C_B:

1627 case AArch64::LD1B_2Z_IMM:

1628 case AArch64::ST1B_2Z_IMM:

1629 case AArch64::STR_ZXI:

1630 case AArch64::STR_PXI:

1631 case AArch64::LDR_ZXI:

1632 case AArch64::LDR_PXI:

1635 }

1636}

1637

1642 const DebugLoc &DL, bool NeedsWinCFI,

1643 bool NeedsUnwindInfo) {

1644

1647 .addReg(AArch64::LR)

1648 .addReg(AArch64::X18)

1651

1652

1654

1655 if (NeedsWinCFI)

1658

1659 if (NeedsUnwindInfo) {

1660

1661

1662 static const char CFIInst[] = {

1663 dwarf::DW_CFA_val_expression,

1664 18,

1665 2,

1666 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),

1667 static_cast<char>(-8) & 0x7f,

1668 };

1670 nullptr, StringRef(CFIInst, sizeof(CFIInst))));

1674 }

1675}

1676

1682

1686 .addReg(AArch64::X18)

1689

1691 unsigned CFIIndex =

1696 }

1697}

1698

1699

1702 const DebugLoc &DL, unsigned FixedObject) {

1707

1708 const int OffsetToFirstCalleeSaveFromFP =

1712 unsigned Reg = TRI->getDwarfRegNum(FramePtr, true);

1714 nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));

1718}

1719

1720#ifndef NDEBUG

1721

1722

1725

1731}

1732#endif

1733

1742

1746 bool HasFP = hasFP(MF);

1748 bool HasWinCFI = false;

1750

1752#ifndef NDEBUG

1754

1755

1757 while (NonFrameStart != End &&

1759 ++NonFrameStart;

1760

1762 if (NonFrameStart != MBB.end()) {

1764

1765 LiveRegs.removeReg(AArch64::SP);

1766 LiveRegs.removeReg(AArch64::X19);

1767 LiveRegs.removeReg(AArch64::FP);

1768 LiveRegs.removeReg(AArch64::LR);

1769

1770

1771

1772

1774 LiveRegs.removeReg(AArch64::X0);

1775 }

1776

1778 if (NonFrameStart == MBB.end())

1779 return;

1780

1783 for (auto &Op : MI.operands())

1784 if (Op.isReg() && Op.isDef())

1785 assert(!LiveRegs.contains(Op.getReg()) &&

1786 "live register clobbered by inserted prologue instructions");

1787 }

1788 });

1789#endif

1790

1792

1793

1794

1795

1797

1798

1799

1801

1803 if (MFnI.needsShadowCallStackPrologueEpilogue(MF))

1805 MFnI.needsDwarfUnwindInfo(MF));

1806

1807 if (MFnI.shouldSignReturnAddress(MF)) {

1810 if (NeedsWinCFI)

1811 HasWinCFI = true;

1812 }

1813

1814 if (EmitCFI && MFnI.isMTETagged()) {

1817 }

1818

1819

1820

1821

1822

1827

1828

1832 if (NeedsWinCFI) {

1835 HasWinCFI = true;

1836 }

1838 .addUse(AArch64::FP)

1839 .addUse(AArch64::X16)

1841 if (NeedsWinCFI) {

1844 HasWinCFI = true;

1845 }

1846 break;

1847 }

1848 [[fallthrough]];

1849

1851

1853 .addUse(AArch64::FP)

1856 if (NeedsWinCFI) {

1859 HasWinCFI = true;

1860 }

1861 break;

1862

1864 break;

1865 }

1866 }

1867

1868

1869

1871 return;

1872

1873

1874

1876 if (TBPI)

1878 else

1880

1882

1883

1884

1885

1886

1887

1888

1889 int64_t NumBytes =

1892 assert(!HasFP && "unexpected function without stack frame but with FP");

1893 assert(!SVEStackSize &&

1894 "unexpected function without stack frame but with SVE objects");

1895

1897 if (!NumBytes)

1898 return;

1899

1900

1903 ++NumRedZoneFunctions;

1904 } else {

1908 if (EmitCFI) {

1909

1911

1917 }

1918 }

1919

1920 if (NeedsWinCFI) {

1921 HasWinCFI = true;

1924 }

1925

1926 return;

1927 }

1928

1929 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());

1930 unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);

1931

1933

1935 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);

1936 bool HomPrologEpilog = homogeneousPrologEpilog(MF);

1937 if (CombineSPBump) {

1938 assert(!SVEStackSize && "Cannot combine SP bump with SVE");

1942 EmitAsyncCFI);

1943 NumBytes = 0;

1944 } else if (HomPrologEpilog) {

1945

1946 NumBytes -= PrologueSaveSize;

1947 } else if (PrologueSaveSize != 0) {

1949 MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,

1950 EmitAsyncCFI);

1951 NumBytes -= PrologueSaveSize;

1952 }

1953 assert(NumBytes >= 0 && "Negative stack allocation size!?");

1954

1955

1956

1957

1960 if (CombineSPBump &&

1961

1964 NeedsWinCFI, &HasWinCFI);

1966 }

1967

1968

1969 if (!IsFunclet && HasFP) {

1970

1972

1973 if (CombineSPBump)

1975

1977

1978

1979

1981 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);

1982 if (HaveInitialContext)

1984 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;

1987 .addUse(AArch64::SP)

1988 .addImm(FPOffset - 8)

1990 if (NeedsWinCFI) {

1991

1992

1996 HasWinCFI = true;

1997 }

1998 }

1999

2000 if (HomPrologEpilog) {

2003 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);

2005 } else {

2006

2007

2008

2009

2013 if (NeedsWinCFI && HasWinCFI) {

2016

2017

2018 NeedsWinCFI = false;

2019 }

2020 }

2021 if (EmitAsyncCFI)

2023 }

2024

2025

2026

2027

2028 if (EmitAsyncCFI)

2029 emitCalleeSavedGPRLocations(MBB, MBBI);

2030

2031

2032 const bool NeedsRealignment =

2033 NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);

2034 const int64_t RealignmentPadding =

2037 : 0;

2038

2040 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;

2041 if (NeedsWinCFI) {

2042 HasWinCFI = true;

2043

2044

2045

2046

2047

2048 if (NumBytes >= (1 << 28))

2050 "unwinding purposes");

2051

2052 uint32_t LowNumWords = NumWords & 0xFFFF;

2054 .addImm(LowNumWords)

2059 if ((NumWords & 0xFFFF0000) != 0) {

2061 .addReg(AArch64::X15)

2062 .addImm((NumWords & 0xFFFF0000) >> 16)

2067 }

2068 } else {

2072 }

2073

2087 if (NeedsWinCFI) {

2088 HasWinCFI = true;

2091 }

2092 break;

2099 if (NeedsWinCFI) {

2100 HasWinCFI = true;

2103 }

2104

2112 if (NeedsWinCFI) {

2113 HasWinCFI = true;

2116 }

2117 break;

2118 }

2119

2125 if (NeedsWinCFI) {

2126 HasWinCFI = true;

2130 }

2131 NumBytes = 0;

2132

2133 if (RealignmentPadding > 0) {

2134 if (RealignmentPadding >= 4096) {

2137 .addImm(RealignmentPadding)

2140 .addReg(AArch64::SP)

2144 } else {

2146 .addReg(AArch64::SP)

2147 .addImm(RealignmentPadding)

2150 }

2151

2157

2158

2159

2160 assert(!NeedsWinCFI);

2161 }

2162 }

2163

2164 StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;

2166

2167

2168

2170 LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize

2171 << "\n");

2172

2173 CalleeSavesBegin = MBBI;

2177 CalleeSavesEnd = MBBI;

2178

2180 SVELocalsSize = SVEStackSize - SVECalleeSavesSize;

2181 }

2182

2183

2187 allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,

2188 nullptr, EmitAsyncCFI && !HasFP, CFAOffset,

2190 CFAOffset += SVECalleeSavesSize;

2191

2192 if (EmitAsyncCFI)

2193 emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);

2194

2195

2196

2198 "Cannot use redzone with stack realignment");

2200

2201

2202

2203 allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,

2205 NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,

2207 }

2208

2209

2210

2211

2212

2213

2214

2215

2216

2217 if (!IsFunclet && RegInfo->hasBasePointer(MF)) {

2219 false);

2220 if (NeedsWinCFI) {

2221 HasWinCFI = true;

2224 }

2225 }

2226

2227

2228

2229 if (NeedsWinCFI && HasWinCFI) {

2232 }

2233

2234

2235

2236

2237 if (IsFunclet && F.hasPersonalityFn()) {

2241 .addReg(AArch64::X1)

2244 }

2245 }

2246

2247 if (EmitCFI && !EmitAsyncCFI) {

2248 if (HasFP) {

2250 } else {

2254 *RegInfo, AArch64::SP, AArch64::SP, TotalSize,

2255 false));

2259 }

2260 emitCalleeSavedGPRLocations(MBB, MBBI);

2261 emitCalleeSavedSVELocations(MBB, MBBI);

2262 }

2263}

2264

2266 switch (MI.getOpcode()) {

2267 default:

2268 return false;

2269 case AArch64::CATCHRET:

2270 case AArch64::CLEANUPRET:

2271 return true;

2272 }

2273}

2274

2285 bool HasWinCFI = false;

2286 bool IsFunclet = false;

2287

2289 DL = MBBI->getDebugLoc();

2291 }

2292

2294

2297 BuildMI(MBB, MBB.getFirstTerminator(), DL,

2298 TII->get(AArch64::PAUTH_EPILOGUE))

2299 .setMIFlag(MachineInstr::FrameDestroy);

2300 if (NeedsWinCFI)

2301 HasWinCFI = true;

2302 }

2305 if (EmitCFI)

2307 if (HasWinCFI) {

2309 TII->get(AArch64::SEH_EpilogEnd))

2313 }

2314 if (NeedsWinCFI) {

2316 if (!HasWinCFI)

2318 }

2319 });

2320

2323

2324

2325

2327 return;

2328

2329

2330

2334 unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);

2335

2336 int64_t AfterCSRPopSize = ArgumentStackToRestore;

2338

2339

2340

2341

2344 if (homogeneousPrologEpilog(MF, &MBB)) {

2345 assert(!NeedsWinCFI);

2347 if (LastPopI != MBB.begin()) {

2348 auto HomogeneousEpilog = std::prev(LastPopI);

2349 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)

2350 LastPopI = HomogeneousEpilog;

2351 }

2352

2353

2357

2358

2359

2360 assert(AfterCSRPopSize == 0);

2361 return;

2362 }

2363 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);

2364

2365 bool CombineAfterCSRBump = false;

2366 if (!CombineSPBump && PrologueSaveSize != 0) {

2368 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||

2370 Pop = std::prev(Pop);

2371

2372

2373 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);

2374

2375

2376

2377 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {

2379 MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,

2381 } else {

2382

2383

2384

2385

2386 AfterCSRPopSize += PrologueSaveSize;

2387 CombineAfterCSRBump = true;

2388 }

2389 }

2390

2391

2392

2393

2396 while (LastPopI != Begin) {

2397 --LastPopI;

2400 ++LastPopI;

2401 break;

2402 } else if (CombineSPBump)

2404 NeedsWinCFI, &HasWinCFI);

2405 }

2406

2407 if (NeedsWinCFI) {

2408

2409

2410

2411

2412

2413

2414 BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))

2416 EpilogStartI = LastPopI;

2417 --EpilogStartI;

2418 }

2419

2423

2424

2425

2426 [[fallthrough]];

2428

2429

2430

2431

2433 AArch64::FP)

2434 .addUse(AArch64::FP)

2437 if (NeedsWinCFI) {

2440 HasWinCFI = true;

2441 }

2442 break;

2443

2445 break;

2446 }

2447 }

2448

2450

2451

2452 if (CombineSPBump) {

2453 assert(!SVEStackSize && "Cannot combine SP bump with SVE");

2454

2455

2456 if (EmitCFI && hasFP(MF)) {

2458 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);

2459 unsigned CFIIndex =

2461 BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))

2464 }

2465

2470 return;

2471 }

2472

2473 NumBytes -= PrologueSaveSize;

2474 assert(NumBytes >= 0 && "Negative stack allocation size!?");

2475

2476

2477

2478 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;

2481 RestoreBegin = std::prev(RestoreEnd);

2482 while (RestoreBegin != MBB.begin() &&

2484 --RestoreBegin;

2485

2487 IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");

2488

2491 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;

2492 DeallocateAfter = CalleeSavedSizeAsOffset;

2493 }

2494

2495

2496 if (SVEStackSize) {

2497

2498

2499

2502

2503

2504

2508 }

2509 } else {

2511

2512

2514 MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,

2516 false, false, nullptr, EmitCFI && hasFP(MF),

2518 NumBytes = 0;

2519 }

2520

2523 false, nullptr, EmitCFI && hasFP(MF),

2524 SVEStackSize +

2526

2529 false, nullptr, EmitCFI && hasFP(MF),

2530 DeallocateAfter +

2532 }

2533 if (EmitCFI)

2534 emitCalleeSavedSVERestores(MBB, RestoreEnd);

2535 }

2536

2537 if (hasFP(MF)) {

2539

2540

2541 if (RedZone && AfterCSRPopSize == 0)

2542 return;

2543

2544

2545

2546

2547

2548 bool NoCalleeSaveRestore = PrologueSaveSize == 0;

2549 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;

2550 if (NoCalleeSaveRestore)

2551 StackRestoreBytes += AfterCSRPopSize;

2552

2554 MBB, LastPopI, DL, AArch64::SP, AArch64::SP,

2558

2559

2560

2561 if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {

2562 return;

2563 }

2564

2565 NumBytes = 0;

2566 }

2567

2568

2569

2570

2571

2574 MBB, LastPopI, DL, AArch64::SP, AArch64::FP,

2577 } else if (NumBytes)

2581

2582

2583 if (EmitCFI && hasFP(MF)) {

2585 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);

2588 BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))

2591 }

2592

2593

2594

2595

2596 if (AfterCSRPopSize) {

2597 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "

2598 "interrupt may have clobbered");

2599

2603 false, NeedsWinCFI, &HasWinCFI, EmitCFI,

2605 }

2606}

2607

2611}

2612

2613

2614

2615

2616

2621 MF, FI, FrameReg,

2622

2625 false);

2626}

2627

2630 int FI) const {

2631

2632

2633

2634

2635

2636

2638

2641

2642

2643

2644

2645 if (MFI.isVariableSizedObjectIndex(FI)) {

2647 }

2648

2649

2650 if (!SVEStackSize)

2652

2656 ObjectOffset);

2657 }

2658

2659 bool IsFixed = MFI.isFixedObjectIndex(FI);

2660 bool IsCSR =

2662

2664 if (!IsFixed && !IsCSR)

2665 ScalableOffset = -SVEStackSize;

2666

2668}

2669

2672 int FI) const {

2674}

2675

2677 int64_t ObjectOffset) {

2681 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());

2682 unsigned FixedObject =

2685 int64_t FPAdjust =

2688}

2689

2691 int64_t ObjectOffset) {

2694}

2695

2696

2698 int FI) const {

2702 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP

2705}

2706

2709 bool ForSimm) const {

2712 bool isFixed = MFI.isFixedObjectIndex(FI);

2715 PreferFP, ForSimm);

2716}

2717

2719 const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,

2720 Register &FrameReg, bool PreferFP, bool ForSimm) const {

2726

2729 bool isCSR =

2731

2733

2734

2735

2736

2737

2738 bool UseFP = false;

2740

2741

2742

2743 PreferFP &= !SVEStackSize;

2744

2745

2746

2747

2748

2749 if (isFixed) {

2750 UseFP = hasFP(MF);

2751 } else if (isCSR && RegInfo->hasStackRealignment(MF)) {

2752

2753

2754

2755 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");

2756 UseFP = true;

2757 } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {

2758

2759

2760

2761

2762 bool FPOffsetFits = !ForSimm || FPOffset >= -256;

2763 PreferFP |= Offset > -FPOffset && !SVEStackSize;

2764

2765 if (FPOffset >= 0) {

2766

2767

2768 UseFP = true;

2769 } else if (MFI.hasVarSizedObjects()) {

2770

2771

2772

2773 bool CanUseBP = RegInfo->hasBasePointer(MF);

2774 if (FPOffsetFits && CanUseBP)

2775 UseFP = PreferFP;

2776 else if (!CanUseBP)

2777 UseFP = true;

2778

2779

2780

2781 } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {

2782

2783

2784

2785 (void) Subtarget;

2788 "Funclets should only be present on Win64");

2789 UseFP = true;

2790 } else {

2791

2792 if (FPOffsetFits && PreferFP)

2793 UseFP = true;

2794 }

2795 }

2796 }

2797

2799 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&

2800 "In the presence of dynamic stack pointer realignment, "

2801 "non-argument/CSR objects cannot be accessed through the frame pointer");

2802

2803 if (isSVE) {

2807 SVEStackSize +

2809 ObjectOffset);

2810

2813 RegInfo->hasStackRealignment(MF))) {

2814 FrameReg = RegInfo->getFrameRegister(MF);

2815 return FPOffset;

2816 }

2817

2818 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()

2820 return SPOffset;

2821 }

2822

2824 if (UseFP && !(isFixed || isCSR))

2825 ScalableOffset = -SVEStackSize;

2826 if (!UseFP && (isFixed || isCSR))

2827 ScalableOffset = SVEStackSize;

2828

2829 if (UseFP) {

2830 FrameReg = RegInfo->getFrameRegister(MF);

2832 }

2833

2834

2835 if (RegInfo->hasBasePointer(MF))

2836 FrameReg = RegInfo->getBaseRegister();

2837 else {

2838 assert(!MFI.hasVarSizedObjects() &&

2839 "Can't use SP when we have var sized objects.");

2840 FrameReg = AArch64::SP;

2841

2842

2843

2846 }

2847

2849}

2850

2852

2853

2854

2855

2856

2859}

2860

2867 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&

2870}

2871

2873 bool NeedsWinCFI, bool IsFirst,

2875

2876

2877

2878

2879

2880

2881

2882 if (Reg2 == AArch64::FP)

2883 return true;

2884 if (!NeedsWinCFI)

2885 return false;

2886 if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)

2887 return false;

2888

2889

2890

2891

2892

2893 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&

2894 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)

2895 return false;

2896 return true;

2897}

2898

2899

2900

2901

2902

2904 bool UsesWinAAPCS, bool NeedsWinCFI,

2905 bool NeedsFrameRecord, bool IsFirst,

2907 if (UsesWinAAPCS)

2910

2911

2912

2913 if (NeedsFrameRecord)

2914 return Reg2 == AArch64::LR;

2915

2916 return false;

2917}

2918

2919namespace {

2920

2921struct RegPairInfo {

2922 unsigned Reg1 = AArch64::NoRegister;

2923 unsigned Reg2 = AArch64::NoRegister;

2924 int FrameIdx;

2926 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;

2928

2929 RegPairInfo() = default;

2930

2931 bool isPaired() const { return Reg2 != AArch64::NoRegister; }

2932

2933 bool isScalable() const { return Type == PPR || Type == ZPR; }

2934};

2935

2936}

2937

2939 for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {

2940 if (SavedRegs.test(PReg)) {

2941 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;

2942 return PNReg;

2943 }

2944 }

2945 return AArch64::NoRegister;

2946}

2947

2948

2952 return false;

2953

2955 bool IsLocallyStreaming =

2957

2958

2959

2960

2961 return Subtarget.hasSVE2p1() ||

2962 (Subtarget.hasSME2() &&

2963 (!IsLocallyStreaming && Subtarget.isStreaming()));

2964}

2965

2969 bool NeedsFrameRecord) {

2970

2971 if (CSI.empty())

2972 return;

2973

2980 unsigned Count = CSI.size();

2981 (void)CC;

2982

2983

2987 "Odd number of callee-saved regs to spill!");

2989 int StackFillDir = -1;

2990 int RegInc = 1;

2991 unsigned FirstReg = 0;

2992 if (NeedsWinCFI) {

2993

2994 ByteOffset = 0;

2995 StackFillDir = 1;

2996

2997

2998 RegInc = -1;

2999 FirstReg = Count - 1;

3000 }

3004

3005

3006 for (unsigned i = FirstReg; i < Count; i += RegInc) {

3007 RegPairInfo RPI;

3008 RPI.Reg1 = CSI[i].getReg();

3009

3010 if (AArch64::GPR64RegClass.contains(RPI.Reg1)) {

3011 RPI.Type = RegPairInfo::GPR;

3012 RPI.RC = &AArch64::GPR64RegClass;

3013 } else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) {

3014 RPI.Type = RegPairInfo::FPR64;

3015 RPI.RC = &AArch64::FPR64RegClass;

3016 } else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) {

3017 RPI.Type = RegPairInfo::FPR128;

3018 RPI.RC = &AArch64::FPR128RegClass;

3019 } else if (AArch64::ZPRRegClass.contains(RPI.Reg1)) {

3020 RPI.Type = RegPairInfo::ZPR;

3021 RPI.RC = &AArch64::ZPRRegClass;

3022 } else if (AArch64::PPRRegClass.contains(RPI.Reg1)) {

3023 RPI.Type = RegPairInfo::PPR;

3024 RPI.RC = &AArch64::PPRRegClass;

3025 } else if (RPI.Reg1 == AArch64::VG) {

3026 RPI.Type = RegPairInfo::VG;

3027 RPI.RC = &AArch64::FIXED_REGSRegClass;

3028 } else {

3030 }

3031

3032

3036 ByteOffset += StackFillDir * StackHazardSize;

3037 LastReg = RPI.Reg1;

3038

3039 int Scale = TRI->getSpillSize(*RPI.RC);

3040

3042 Register NextReg = CSI[i + RegInc].getReg();

3043 bool IsFirst = i == FirstReg;

3044 switch (RPI.Type) {

3045 case RegPairInfo::GPR:

3046 if (AArch64::GPR64RegClass.contains(NextReg) &&

3048 NeedsWinCFI, NeedsFrameRecord, IsFirst,

3050 RPI.Reg2 = NextReg;

3051 break;

3052 case RegPairInfo::FPR64:

3053 if (AArch64::FPR64RegClass.contains(NextReg) &&

3055 IsFirst, TRI))

3056 RPI.Reg2 = NextReg;

3057 break;

3058 case RegPairInfo::FPR128:

3059 if (AArch64::FPR128RegClass.contains(NextReg))

3060 RPI.Reg2 = NextReg;

3061 break;

3062 case RegPairInfo::PPR:

3063 break;

3064 case RegPairInfo::ZPR:

3066 ((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {

3067

3068

3069 int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;

3071 RPI.Reg2 = NextReg;

3072 }

3073 break;

3074 case RegPairInfo::VG:

3075 break;

3076 }

3077 }

3078

3079

3080

3081

3082

3083

3084

3085 assert((!RPI.isPaired() ||

3086 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&

3087 "Out of order callee saved regs!");

3088

3089 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||

3090 RPI.Reg1 == AArch64::LR) &&

3091 "FrameRecord must be allocated together with LR");

3092

3093

3094 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||

3095 RPI.Reg2 == AArch64::LR) &&

3096 "FrameRecord must be allocated together with LR");

3097

3098

3099

3103 (RPI.isPaired() &&

3104 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||

3105 RPI.Reg1 + 1 == RPI.Reg2))) &&

3106 "Callee-save registers not saved as adjacent register pair!");

3107

3108 RPI.FrameIdx = CSI[i].getFrameIdx();

3109 if (NeedsWinCFI &&

3110 RPI.isPaired())

3111 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();

3112

3113 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;

3114 assert(OffsetPre % Scale == 0);

3115

3116 if (RPI.isScalable())

3117 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);

3118 else

3119 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);

3120

3121

3122

3124 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||

3125 (IsWindows && RPI.Reg2 == AArch64::LR)))

3126 ByteOffset += StackFillDir * 8;

3127

3128

3129

3130 if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&

3131 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&

3132 ByteOffset % 16 != 0) {

3133 ByteOffset += 8 * StackFillDir;

3135

3136

3137

3139 NeedGapToAlignStack = false;

3140 }

3141

3142 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;

3143 assert(OffsetPost % Scale == 0);

3144

3145

3146 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;

3147

3148

3149

3151 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||

3152 (IsWindows && RPI.Reg2 == AArch64::LR)))

3154 RPI.Offset = Offset / Scale;

3155

3156 assert((!RPI.isPaired() ||

3157 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||

3158 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&

3159 "Offset out of bounds for LDP/STP immediate");

3160

3161 auto isFrameRecord = [&] {

3162 if (RPI.isPaired())

3163 return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR

3164 : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;

3165

3166

3167

3168

3169

3170

3171

3172 return i > 0 && RPI.Reg1 == AArch64::FP &&

3173 CSI[i - 1].getReg() == AArch64::LR;

3174 };

3175

3176

3177

3178 if (NeedsFrameRecord && isFrameRecord())

3180

3182 if (RPI.isPaired())

3183 i += RegInc;

3184 }

3185 if (NeedsWinCFI) {

3186

3187

3188

3189

3190

3193

3194

3195 std::reverse(RegPairs.begin(), RegPairs.end());

3196 }

3197}

3198

3208

3210

3212

3213

3214 MRI.freezeReservedRegs();

3215

3216 if (homogeneousPrologEpilog(MF)) {

3219

3220 for (auto &RPI : RegPairs) {

3221 MIB.addReg(RPI.Reg1);

3222 MIB.addReg(RPI.Reg2);

3223

3224

3225 if (MRI.isReserved(RPI.Reg1))

3227 if (RPI.isPaired() && MRI.isReserved(RPI.Reg2))

3229 }

3230 return true;

3231 }

3232 bool PTrueCreated = false;

3233 for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {

3234 unsigned Reg1 = RPI.Reg1;

3235 unsigned Reg2 = RPI.Reg2;

3236 unsigned StrOpc;

3237

3238

3239

3240

3241

3242

3243

3244

3245

3246

3247

3248 unsigned Size = TRI->getSpillSize(*RPI.RC);

3249 Align Alignment = TRI->getSpillAlign(*RPI.RC);

3250 switch (RPI.Type) {

3251 case RegPairInfo::GPR:

3252 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;

3253 break;

3254 case RegPairInfo::FPR64:

3255 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;

3256 break;

3257 case RegPairInfo::FPR128:

3258 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;

3259 break;

3260 case RegPairInfo::ZPR:

3261 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;

3262 break;

3263 case RegPairInfo::PPR:

3264 StrOpc = AArch64::STR_PXI;

3265 break;

3266 case RegPairInfo::VG:

3267 StrOpc = AArch64::STRXui;

3268 break;

3269 }

3270

3271 unsigned X0Scratch = AArch64::NoRegister;

3272 if (Reg1 == AArch64::VG) {

3273

3275 assert(Reg1 != AArch64::NoRegister);

3277

3278 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&

3280

3281

3290

3297 AFI->setVGIdx(RPI.FrameIdx);

3298 } else {

3303 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(

3304 AArch64::X0, LiveIn.PhysReg);

3305 }))

3306 X0Scratch = Reg1;

3307

3308 if (X0Scratch != AArch64::NoRegister)

3310 .addReg(AArch64::XZR)

3314

3315 const uint32_t *RegMask = TRI->getCallPreservedMask(

3316 MF,

3323 Reg1 = AArch64::X0;

3324 AFI->setVGIdx(RPI.FrameIdx);

3325 }

3326 }

3327

3329 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);

3330 dbgs() << ") -> fi#(" << RPI.FrameIdx;

3331 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;

3332 dbgs() << ")\n");

3333

3334 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&

3335 "Windows unwdinding requires a consecutive (FP,LR) pair");

3336

3337

3338

3339 unsigned FrameIdxReg1 = RPI.FrameIdx;

3340 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;

3341 if (NeedsWinCFI && RPI.isPaired()) {

3343 std::swap(FrameIdxReg1, FrameIdxReg2);

3344 }

3345

3346 if (RPI.isPaired() && RPI.isScalable()) {

3352 "Expects SVE2.1 or SME2 target and a predicate register");

3353#ifdef EXPENSIVE_CHECKS

3354 auto IsPPR = [](const RegPairInfo &c) {

3355 return c.Reg1 == RegPairInfo::PPR;

3356 };

3357 auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);

3358 auto IsZPR = [](const RegPairInfo &c) {

3359 return c.Type == RegPairInfo::ZPR;

3360 };

3361 auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);

3362 assert(!(PPRBegin < ZPRBegin) &&

3363 "Expected callee save predicate to be handled first");

3364#endif

3365 if (!PTrueCreated) {

3366 PTrueCreated = true;

3369 }

3371 if (MRI.isReserved(Reg1))

3373 if (MRI.isReserved(Reg2))

3375 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));

3380 MIB.addReg(AArch64::SP)

3381 .addImm(RPI.Offset / 2)

3382

3387 if (NeedsWinCFI)

3389 } else {

3391 if (MRI.isReserved(Reg1))

3393 if (RPI.isPaired()) {

3394 if (MRI.isReserved(Reg2))

3400 }

3402 .addReg(AArch64::SP)

3403 .addImm(RPI.Offset)

3404

3409 if (NeedsWinCFI)

3411 }

3412

3414 if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {

3416 if (RPI.isPaired())

3418 }

3419

3420 if (X0Scratch != AArch64::NoRegister)

3422 .addReg(AArch64::XZR)

3426 }

3427 return true;

3428}

3429

3438

3440 DL = MBBI->getDebugLoc();

3441

3443 if (homogeneousPrologEpilog(MF, &MBB)) {

3446 for (auto &RPI : RegPairs) {

3449 }

3450 return true;

3451 }

3452

3453

3454 auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };

3455 auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);

3456 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);

3457 std::reverse(PPRBegin, PPREnd);

3458 auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };

3459 auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);

3460 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);

3461 std::reverse(ZPRBegin, ZPREnd);

3462

3463 bool PTrueCreated = false;

3464 for (const RegPairInfo &RPI : RegPairs) {

3465 unsigned Reg1 = RPI.Reg1;

3466 unsigned Reg2 = RPI.Reg2;

3467

3468

3469

3470

3471

3472

3473

3474

3475

3476 unsigned LdrOpc;

3477 unsigned Size = TRI->getSpillSize(*RPI.RC);

3478 Align Alignment = TRI->getSpillAlign(*RPI.RC);

3479 switch (RPI.Type) {

3480 case RegPairInfo::GPR:

3481 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;

3482 break;

3483 case RegPairInfo::FPR64:

3484 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;

3485 break;

3486 case RegPairInfo::FPR128:

3487 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;

3488 break;

3489 case RegPairInfo::ZPR:

3490 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;

3491 break;

3492 case RegPairInfo::PPR:

3493 LdrOpc = AArch64::LDR_PXI;

3494 break;

3495 case RegPairInfo::VG:

3496 continue;

3497 }

3499 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);

3500 dbgs() << ") -> fi#(" << RPI.FrameIdx;

3501 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;

3502 dbgs() << ")\n");

3503

3504

3505

3506

3507 unsigned FrameIdxReg1 = RPI.FrameIdx;

3508 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;

3509 if (NeedsWinCFI && RPI.isPaired()) {

3511 std::swap(FrameIdxReg1, FrameIdxReg2);

3512 }

3513

3515 if (RPI.isPaired() && RPI.isScalable()) {

3520 "Expects SVE2.1 or SME2 target and a predicate register");

3521#ifdef EXPENSIVE_CHECKS

3522 assert(!(PPRBegin < ZPRBegin) &&

3523 "Expected callee save predicate to be handled first");

3524#endif

3525 if (!PTrueCreated) {

3526 PTrueCreated = true;

3529 }

3531 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),

3537 MIB.addReg(AArch64::SP)

3538 .addImm(RPI.Offset / 2)

3539

3544 if (NeedsWinCFI)

3546 } else {

3548 if (RPI.isPaired()) {

3553 }

3555 MIB.addReg(AArch64::SP)

3556 .addImm(RPI.Offset)

3557

3562 if (NeedsWinCFI)

3564 }

3565 }

3566 return true;

3567}

3568

3569

3572 auto *PSV =

3573 dyn_cast_or_null(MMO->getPseudoValue());

3574 if (PSV)

3575 return std::optional(PSV->getFrameIndex());

3576

3580 FI++)

3582 return FI;

3583 }

3584 }

3585

3586 return std::nullopt;

3587}

3588

3589

3592 if (MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)

3593 return std::nullopt;

3594

3596}

3597

3598

3599

3600

3601void AArch64FrameLowering::determineStackHazardSlot(

3604 if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||

3606 return;

3607

3608

3611 return;

3612

3614

3615

3616

3617 bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {

3618 return AArch64::FPR64RegClass.contains(Reg) ||

3619 AArch64::FPR128RegClass.contains(Reg) ||

3620 AArch64::ZPRRegClass.contains(Reg) ||

3621 AArch64::PPRRegClass.contains(Reg);

3622 });

3623 bool HasFPRStackObjects = false;

3624 if (!HasFPRCSRs) {

3626 for (auto &MBB : MF) {

3627 for (auto &MI : MBB) {

3629 if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {

3632 FrameObjects[*FI] |= 2;

3633 else

3634 FrameObjects[*FI] |= 1;

3635 }

3636 }

3637 }

3638 HasFPRStackObjects =

3639 any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });

3640 }

3641

3642 if (HasFPRCSRs || HasFPRStackObjects) {

3644 LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size "

3645 << StackHazardSize << "\n");

3647 }

3648}

3649

3653

3654

3656 return;

3657

3663 unsigned UnspilledCSGPR = AArch64::NoRegister;

3664 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;

3665

3668

3669 unsigned BasePointerReg = RegInfo->hasBasePointer(MF)

3671 : (unsigned)AArch64::NoRegister;

3672

3673 unsigned ExtraCSSpill = 0;

3674 bool HasUnpairedGPR64 = false;

3675 bool HasPairZReg = false;

3676

3677 for (unsigned i = 0; CSRegs[i]; ++i) {

3678 const unsigned Reg = CSRegs[i];

3679

3680

3681 if (Reg == BasePointerReg)

3682 SavedRegs.set(Reg);

3683

3684 bool RegUsed = SavedRegs.test(Reg);

3685 unsigned PairedReg = AArch64::NoRegister;

3686 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);

3687 if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||

3688 AArch64::FPR128RegClass.contains(Reg)) {

3689

3690

3691 if (HasUnpairedGPR64)

3692 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];

3693 else

3694 PairedReg = CSRegs[i ^ 1];

3695 }

3696

3697

3698

3699

3700

3701 if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {

3702 PairedReg = AArch64::NoRegister;

3703 HasUnpairedGPR64 = true;

3704 }

3705 assert(PairedReg == AArch64::NoRegister ||

3706 AArch64::GPR64RegClass.contains(Reg, PairedReg) ||

3707 AArch64::FPR64RegClass.contains(Reg, PairedReg) ||

3708 AArch64::FPR128RegClass.contains(Reg, PairedReg));

3709

3710 if (!RegUsed) {

3711 if (AArch64::GPR64RegClass.contains(Reg) &&

3713 UnspilledCSGPR = Reg;

3714 UnspilledCSGPRPaired = PairedReg;

3715 }

3716 continue;

3717 }

3718

3719

3720

3721

3722 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&

3723 !SavedRegs.test(PairedReg)) {

3724 SavedRegs.set(PairedReg);

3725 if (AArch64::GPR64RegClass.contains(PairedReg) &&

3727 ExtraCSSpill = PairedReg;

3728 }

3729

3730 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&

3731 SavedRegs.test(CSRegs[i ^ 1]));

3732 }

3733

3736

3737

3739 if (PnReg != AArch64::NoRegister)

3741

3745 SavedRegs.set(AArch64::P8);

3747 }

3748

3750 "Predicate cannot be a reserved register");

3751 }

3752

3755

3756

3757

3758

3759

3760 SavedRegs.set(AArch64::X18);

3761 }

3762

3763

3764 unsigned CSStackSize = 0;

3765 unsigned SVECSStackSize = 0;

3767 for (unsigned Reg : SavedRegs.set_bits()) {

3768 auto *RC = TRI->getMinimalPhysRegClass(Reg);

3769 assert(RC && "expected register class!");

3770 auto SpillSize = TRI->getSpillSize(*RC);

3771 if (AArch64::PPRRegClass.contains(Reg) ||

3772 AArch64::ZPRRegClass.contains(Reg))

3773 SVECSStackSize += SpillSize;

3774 else

3775 CSStackSize += SpillSize;

3776 }

3777

3778

3779

3780

3781

3785 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())

3786 CSStackSize += 16;

3787 else

3788 CSStackSize += 8;

3789 }

3790

3791

3792

3793 determineStackHazardSlot(MF, SavedRegs);

3794 if (AFI->hasStackHazardSlotIndex())

3796

3797

3798 unsigned NumSavedRegs = SavedRegs.count();

3799

3800

3802 if (hasFP(MF) ||

3804 SavedRegs.set(AArch64::FP);

3805 SavedRegs.set(AArch64::LR);

3806 }

3807

3809 dbgs() << "*** determineCalleeSaves\nSaved CSRs:";

3810 for (unsigned Reg : SavedRegs.set_bits())

3812 dbgs() << "\n";

3813 });

3814

3815

3816 int64_t SVEStackSize =

3817 alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);

3818 bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;

3819

3820

3821

3823

3824

3825

3826

3827 int64_t CalleeStackUsed = 0;

3830 if (FixedOff > CalleeStackUsed)

3831 CalleeStackUsed = FixedOff;

3832 }

3833

3834

3835 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +

3836 CalleeStackUsed) > EstimatedStackSizeLimit;

3838 AFI->setHasStackFrame(true);

3839

3840

3841

3842

3843

3844

3845

3846 if (BigStack) {

3847 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {

3849 << " to get a scratch register.\n");

3850 SavedRegs.set(UnspilledCSGPR);

3851 ExtraCSSpill = UnspilledCSGPR;

3852

3853

3854

3855

3856 if (producePairRegisters(MF)) {

3857 if (UnspilledCSGPRPaired == AArch64::NoRegister) {

3858

3860 SavedRegs.reset(UnspilledCSGPR);

3861 ExtraCSSpill = AArch64::NoRegister;

3862 }

3863 } else

3864 SavedRegs.set(UnspilledCSGPRPaired);

3865 }

3866 }

3867

3868

3869

3873 unsigned Size = TRI->getSpillSize(RC);

3874 Align Alignment = TRI->getSpillAlign(RC);

3877 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI

3878 << " as the emergency spill slot.\n");

3879 }

3880 }

3881

3882

3883 CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);

3884

3885

3886

3887 if (hasFP(MF) && AFI->hasSwiftAsyncContext())

3888 CSStackSize += 8;

3889

3892 << EstimatedStackSize + AlignedCSStackSize << " bytes.\n");

3893

3895 AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&

3896 "Should not invalidate callee saved info");

3897

3898

3899

3900 AFI->setCalleeSavedStackSize(AlignedCSStackSize);

3901 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);

3902 AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));

3903}

3904

3907 std::vector &CSI, unsigned &MinCSFrameIndex,

3908 unsigned &MaxCSFrameIndex) const {

3911

3912

3913

3914

3915

3916 if (NeedsWinCFI)

3917 std::reverse(CSI.begin(), CSI.end());

3918

3919 if (CSI.empty())

3920 return true;

3921

3922

3923

3926

3931 if ((unsigned)FrameIdx < MinCSFrameIndex)

3932 MinCSFrameIndex = FrameIdx;

3933 if ((unsigned)FrameIdx > MaxCSFrameIndex)

3934 MaxCSFrameIndex = FrameIdx;

3935 }

3936

3937

3939 std::vector VGSaves;

3941

3943 VGInfo.setRestored(false);

3944 VGSaves.push_back(VGInfo);

3945

3946

3947

3948 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())

3949 VGSaves.push_back(VGInfo);

3950

3951 bool InsertBeforeLR = false;

3952

3953 for (unsigned I = 0; I < CSI.size(); I++)

3954 if (CSI[I].getReg() == AArch64::LR) {

3955 InsertBeforeLR = true;

3956 CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());

3957 break;

3958 }

3959

3960 if (!InsertBeforeLR)

3961 CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());

3962 }

3963

3965 int HazardSlotIndex = std::numeric_limits::max();

3966 for (auto &CS : CSI) {

3969

3970

3974 assert(HazardSlotIndex == std::numeric_limits::max() &&

3975 "Unexpected register order for hazard slot");

3977 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex

3978 << "\n");

3980 if ((unsigned)HazardSlotIndex < MinCSFrameIndex)

3981 MinCSFrameIndex = HazardSlotIndex;

3982 if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)

3983 MaxCSFrameIndex = HazardSlotIndex;

3984 }

3985

3989 CS.setFrameIdx(FrameIdx);

3990

3991 if ((unsigned)FrameIdx < MinCSFrameIndex)

3992 MinCSFrameIndex = FrameIdx;

3993 if ((unsigned)FrameIdx > MaxCSFrameIndex)

3994 MaxCSFrameIndex = FrameIdx;

3995

3996

3998 Reg == AArch64::FP) {

4001 if ((unsigned)FrameIdx < MinCSFrameIndex)

4002 MinCSFrameIndex = FrameIdx;

4003 if ((unsigned)FrameIdx > MaxCSFrameIndex)

4004 MaxCSFrameIndex = FrameIdx;

4005 }

4006 LastReg = Reg;

4007 }

4008

4009

4011 HazardSlotIndex == std::numeric_limits::max()) {

4013 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex

4014 << "\n");

4016 if ((unsigned)HazardSlotIndex < MinCSFrameIndex)

4017 MinCSFrameIndex = HazardSlotIndex;

4018 if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)

4019 MaxCSFrameIndex = HazardSlotIndex;

4020 }

4021

4022 return true;

4023}

4024

4028

4029

4030

4031

4033 return false;

4034

4035

4037 return false;

4039}

4040

4041

4043 int &Min, int &Max) {

4044 Min = std::numeric_limits::max();

4045 Max = std::numeric_limits::min();

4046

4048 return false;

4049

4051 for (auto &CS : CSI) {

4052 if (AArch64::ZPRRegClass.contains(CS.getReg()) ||

4053 AArch64::PPRRegClass.contains(CS.getReg())) {

4054 assert((Max == std::numeric_limits::min() ||

4055 Max + 1 == CS.getFrameIdx()) &&

4056 "SVE CalleeSaves are not consecutive");

4057

4058 Min = std::min(Min, CS.getFrameIdx());

4059 Max = std::max(Max, CS.getFrameIdx());

4060 }

4061 }

4062 return Min != std::numeric_limits::max();

4063}

4064

4065

4066

4067

4068

4069

4071 int &MinCSFrameIndex,

4072 int &MaxCSFrameIndex,

4073 bool AssignOffsets) {

4074#ifndef NDEBUG

4075

4078 "SVE vectors should never be passed on the stack by value, only by "

4079 "reference.");

4080#endif

4081

4082 auto Assign = [&MFI](int FI, int64_t Offset) {

4085 };

4086

4088

4089

4091

4092 for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {

4095 if (AssignOffsets)

4097 }

4098 }

4099

4100

4102

4103

4105

4106

4107

4108 int StackProtectorFI = -1;

4112 ObjectsToAllocate.push_back(StackProtectorFI);

4113 }

4117 continue;

4118 if (I == StackProtectorFI)

4119 continue;

4120 if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)

4121 continue;

4123 continue;

4124

4126 }

4127

4128

4129 for (unsigned FI : ObjectsToAllocate) {

4131

4132

4133

4134 if (Alignment > Align(16))

4136 "Alignment of scalable vectors > 16 bytes is not yet supported");

4137

4139 if (AssignOffsets)

4140 Assign(FI, -Offset);

4141 }

4142

4144}

4145

4146int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(

4148 int MinCSFrameIndex, MaxCSFrameIndex;

4150}

4151

4152int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(

4153 MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {

4155 true);

4156}

4157

4161

4163 "Upwards growing stack unsupported");

4164

4165 int MinCSFrameIndex, MaxCSFrameIndex;

4166 int64_t SVEStackSize =

4167 assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);

4168

4172

4173

4174

4176 return;

4179

4184

4185

4186

4187 int64_t FixedObject =

4190 -FixedObject,

4191 false);

4193

4194

4195

4200 assert(DstReg && "There must be a free register after frame setup");

4206}

4207

4208namespace {

4209struct TagStoreInstr {

4214};

4215

4216class TagStoreEdit {

4220

4222

4224

4225

4226

4229 int64_t Size;

4230

4231

4232 std::optional<int64_t> FrameRegUpdate;

4233

4234 unsigned FrameRegUpdateFlags;

4235

4236

4237 bool ZeroData;

4239

4242

4243public:

4245 : MBB(MBB), ZeroData(ZeroData) {

4248 }

4249

4250

4251 void addInstruction(TagStoreInstr I) {

4253 TagStores.back().Offset + TagStores.back().Size == I.Offset) &&

4254 "Non-adjacent tag store instructions.");

4256 }

4257 void clear() { TagStores.clear(); }

4258

4259

4260

4263};

4264

4268

4269 const int64_t kMinOffset = -256 * 16;

4270 const int64_t kMaxOffset = 255 * 16;

4271

4272 Register BaseReg = FrameReg;

4273 int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();

4274 if (BaseRegOffsetBytes < kMinOffset ||

4275 BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset ||

4276

4277

4278

4279 BaseRegOffsetBytes % 16 != 0) {

4280 Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);

4283 BaseReg = ScratchReg;

4284 BaseRegOffsetBytes = 0;

4285 }

4286

4288 while (Size) {

4289 int64_t InstrSize = (Size > 16) ? 32 : 16;

4290 unsigned Opcode =

4291 InstrSize == 16

4292 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)

4293 : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);

4294 assert(BaseRegOffsetBytes % 16 == 0);

4296 .addReg(AArch64::SP)

4298 .addImm(BaseRegOffsetBytes / 16)

4300

4301

4302 if (BaseRegOffsetBytes == 0)

4303 LastI = I;

4304 BaseRegOffsetBytes += InstrSize;

4305 Size -= InstrSize;

4306 }

4307

4308 if (LastI)

4310}

4311

4315

4316 Register BaseReg = FrameRegUpdate

4317 ? FrameReg

4318 : MRI->createVirtualRegister(&AArch64::GPR64RegClass);

4319 Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);

4320

4322

4323 int64_t LoopSize = Size;

4324

4325

4326 if (FrameRegUpdate && *FrameRegUpdate)

4327 LoopSize -= LoopSize % 32;

4329 TII->get(ZeroData ? AArch64::STZGloop_wback

4330 : AArch64::STGloop_wback))

4336 if (FrameRegUpdate)

4337 LoopI->setFlags(FrameRegUpdateFlags);

4338

4339 int64_t ExtraBaseRegUpdate =

4340 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;

4341 LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize

4342 << ", Size=" << Size

4343 << ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate

4344 << ", FrameRegUpdate=" << FrameRegUpdate

4345 << ", FrameRegOffset.getFixed()="

4346 << FrameRegOffset.getFixed() << "\n");

4347 if (LoopSize < Size) {

4348 assert(FrameRegUpdate);

4350

4351 int64_t STGOffset = ExtraBaseRegUpdate + 16;

4352 assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&

4353 "STG immediate out of range");

4355 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))

4359 .addImm(STGOffset / 16)

4362 } else if (ExtraBaseRegUpdate) {

4363

4364 int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);

4365 assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range");

4367 *MBB, InsertI, DL,

4368 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))

4371 .addImm(AddSubOffset)

4374 }

4375}

4376

4377

4378

4379

4381 int64_t Size, int64_t *TotalOffset) {

4383 if ((MI.getOpcode() == AArch64::ADDXri ||

4384 MI.getOpcode() == AArch64::SUBXri) &&

4385 MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {

4387 int64_t Offset = MI.getOperand(2).getImm() << Shift;

4388 if (MI.getOpcode() == AArch64::SUBXri)

4391

4392

4393

4394

4395

4396

4397

4398

4399 const int64_t kMaxOffset = 4080 - 16;

4400

4401 const int64_t kMinOffset = -4095;

4402 if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&

4403 PostOffset % 16 == 0) {

4404 *TotalOffset = Offset;

4405 return true;

4406 }

4407 }

4408 return false;

4409}

4410

4413 MemRefs.clear();

4414 for (auto &TS : TSE) {

4416

4417

4418 if (MI->memoperands_empty()) {

4419 MemRefs.clear();

4420 return;

4421 }

4422 MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());

4423 }

4424}

4425

4428 bool TryMergeSPUpdate) {

4429 if (TagStores.empty())

4430 return;

4431 TagStoreInstr &FirstTagStore = TagStores[0];

4432 TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];

4433 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;

4434 DL = TagStores[0].MI->getDebugLoc();

4435

4438 *MF, FirstTagStore.Offset, false , false , Reg,

4439 false, true);

4440 FrameReg = Reg;

4441 FrameRegUpdate = std::nullopt;

4442

4443 mergeMemRefs(TagStores, CombinedMemRefs);

4444

4446 dbgs() << "Replacing adjacent STG instructions:\n";

4447 for (const auto &Instr : TagStores) {

4449 }

4450 });

4451

4452

4453

4456 if (TagStores.size() < 2)

4457 return;

4458 emitUnrolled(InsertI);

4459 } else {

4461 int64_t TotalOffset = 0;

4462 if (TryMergeSPUpdate) {

4463

4464

4465

4466

4467

4468 if (InsertI != MBB->end() &&

4469 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,

4470 &TotalOffset)) {

4471 UpdateInstr = &*InsertI++;

4472 LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "

4473 << *UpdateInstr);

4474 }

4475 }

4476

4477 if (!UpdateInstr && TagStores.size() < 2)

4478 return;

4479

4480 if (UpdateInstr) {

4481 FrameRegUpdate = TotalOffset;

4482 FrameRegUpdateFlags = UpdateInstr->getFlags();

4483 }

4484 emitLoop(InsertI);

4485 if (UpdateInstr)

4487 }

4488

4489 for (auto &TS : TagStores)

4490 TS.MI->eraseFromParent();

4491}

4492

4494 int64_t &Size, bool &ZeroData) {

4497

4498 unsigned Opcode = MI.getOpcode();

4499 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||

4500 Opcode == AArch64::STZ2Gi);

4501

4502 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {

4503 if (MI.getOperand(0).isDead() || MI.getOperand(1).isDead())

4504 return false;

4505 if (MI.getOperand(2).isImm() || MI.getOperand(3).isFI())

4506 return false;

4508 Size = MI.getOperand(2).getImm();

4509 return true;

4510 }

4511

4512 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)

4514 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)

4516 else

4517 return false;

4518

4519 if (MI.getOperand(0).getReg() != AArch64::SP || MI.getOperand(1).isFI())

4520 return false;

4521

4523 16 * MI.getOperand(2).getImm();

4524 return true;

4525}

4526

4527

4528

4529

4530

4531

4532

4536 bool FirstZeroData;

4542 return II;

4543 if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))

4544 return II;

4545

4548

4549 constexpr int kScanLimit = 10;

4550 int Count = 0;

4552 NextI != E && Count < kScanLimit; ++NextI) {

4554 bool ZeroData;

4556

4557

4558

4559

4560

4561 if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {

4562 if (ZeroData != FirstZeroData)

4563 break;

4565 continue;

4566 }

4567

4568

4569

4570 if (MI.isTransient())

4571 ++Count;

4572

4573

4576 break;

4577

4578

4579 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() || MI.isCall())

4580 break;

4581 }

4582

4583

4585

4586

4587

4588

4589

4590

4591

4592

4593

4595 LiveRegs.addLiveOuts(*MBB);

4598 if (MI == InsertI)

4599 break;

4600 LiveRegs.stepBackward(*I);

4601 }

4602 InsertI++;

4603 if (LiveRegs.contains(AArch64::NZCV))

4604 return InsertI;

4605

4607 [](const TagStoreInstr &Left, const TagStoreInstr &Right) {

4608 return Left.Offset < Right.Offset;

4609 });

4610

4611

4612 int64_t CurOffset = Instrs[0].Offset;

4613 for (auto &Instr : Instrs) {

4614 if (CurOffset > Instr.Offset)

4615 return NextI;

4616 CurOffset = Instr.Offset + Instr.Size;

4617 }

4618

4619

4620

4621 TagStoreEdit TSE(MBB, FirstZeroData);

4622 std::optional<int64_t> EndOffset;

4623 for (auto &Instr : Instrs) {

4624 if (EndOffset && *EndOffset != Instr.Offset) {

4625

4626 TSE.emitCode(InsertI, TFI, false);

4627 TSE.clear();

4628 }

4629

4630 TSE.addInstruction(Instr);

4631 EndOffset = Instr.Offset + Instr.Size;

4632 }

4633

4635

4636 TSE.emitCode(

4637 InsertI, TFI,

4639

4640 return InsertI;

4641}

4642}

4643

4649

4650 if (MI.getOpcode() != AArch64::VGSavePseudo &&

4651 MI.getOpcode() != AArch64::VGRestorePseudo)

4652 return II;

4653

4655 bool LocallyStreaming =

4661

4662 int64_t VGFrameIdx =

4664 assert(VGFrameIdx != std::numeric_limits::max() &&

4665 "Expected FrameIdx for VG");

4666

4667 unsigned CFIIndex;

4668 if (MI.getOpcode() == AArch64::VGSavePseudo) {

4673 nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset));

4674 } else

4676 nullptr, TRI->getDwarfRegNum(AArch64::VG, true)));

4677

4679 TII->get(TargetOpcode::CFI_INSTRUCTION))

4681

4682 MI.eraseFromParent();

4684}

4685

4688 for (auto &BB : MF)

4693 II = tryMergeAdjacentSTG(II, this, RS);

4694 }

4695}

4696

4697

4698

4699

4702 bool IgnoreSPUpdates) const {

4704 if (IgnoreSPUpdates) {

4705 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "

4707 FrameReg = AArch64::SP;

4709 }

4710

4711

4716

4717 FrameReg = AArch64::SP;

4719}

4720

4721

4722

4725 return 0;

4726}

4727

4728

4729

4732

4733 unsigned CSSize =

4735

4738}

4739

4740namespace {

4741struct FrameObject {

4742 bool IsValid = false;

4743

4744 int ObjectIndex = 0;

4745

4746 int GroupIndex = -1;

4747

4748 bool ObjectFirst = false;

4749

4750

4751 bool GroupFirst = false;

4752

4753

4754

4755 unsigned Accesses = 0;

4756 enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };

4757};

4758

4759class GroupBuilder {

4761 int NextGroupIndex = 0;

4762 std::vector &Objects;

4763

4764public:

4765 GroupBuilder(std::vector &Objects) : Objects(Objects) {}

4767 void EndCurrentGroup() {

4768 if (CurrentMembers.size() > 1) {

4769

4770

4771

4773 for (int Index : CurrentMembers) {

4774 Objects[Index].GroupIndex = NextGroupIndex;

4776 }

4778 NextGroupIndex++;

4779 }

4780 CurrentMembers.clear();

4781 }

4782};

4783

4784bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {

4785

4786

4787

4788

4789

4790

4791

4792

4793

4794

4795

4796

4797

4798

4799

4800

4801

4802

4803

4804

4805

4806 return std::make_tuple(A.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst,

4807 A.GroupIndex, A.ObjectIndex) <

4808 std::make_tuple(B.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst,

4809 B.GroupIndex, B.ObjectIndex);

4810}

4811}

4812

4816 return;

4817

4820 std::vector FrameObjects(MFI.getObjectIndexEnd());

4821 for (auto &Obj : ObjectsToAllocate) {

4822 FrameObjects[Obj].IsValid = true;

4823 FrameObjects[Obj].ObjectIndex = Obj;

4824 }

4825

4826

4827

4828 GroupBuilder GB(FrameObjects);

4829 for (auto &MBB : MF) {

4830 for (auto &MI : MBB) {

4831 if (MI.isDebugInstr())

4832 continue;

4833

4836 if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {

4839 FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;

4840 else

4841 FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;

4842 }

4843 }

4844

4846 switch (MI.getOpcode()) {

4847 case AArch64::STGloop:

4848 case AArch64::STZGloop:

4850 break;

4851 case AArch64::STGi:

4852 case AArch64::STZGi:

4853 case AArch64::ST2Gi:

4854 case AArch64::STZ2Gi:

4856 break;

4857 default:

4859 }

4860

4861 int TaggedFI = -1;

4864 if (MO.isFI()) {

4866 if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&

4867 FrameObjects[FI].IsValid)

4868 TaggedFI = FI;

4869 }

4870 }

4871

4872

4873

4874 if (TaggedFI >= 0)

4875 GB.AddMember(TaggedFI);

4876 else

4877 GB.EndCurrentGroup();

4878 }

4879

4880 GB.EndCurrentGroup();

4881 }

4882

4885 FrameObject::AccessHazard;

4886

4887 for (auto &Obj : FrameObjects)

4888 if (!Obj.Accesses ||

4889 Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))

4890 Obj.Accesses = FrameObject::AccessGPR;

4891 }

4892

4893

4894

4895

4896

4898 if (TBPI) {

4899 FrameObjects[*TBPI].ObjectFirst = true;

4900 FrameObjects[*TBPI].GroupFirst = true;

4901 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;

4902 if (FirstGroupIndex >= 0)

4903 for (FrameObject &Object : FrameObjects)

4904 if (Object.GroupIndex == FirstGroupIndex)

4905 Object.GroupFirst = true;

4906 }

4907

4909

4910 int i = 0;

4911 for (auto &Obj : FrameObjects) {

4912

4913 if (!Obj.IsValid)

4914 break;

4915 ObjectsToAllocate[i++] = Obj.ObjectIndex;

4916 }

4917

4919 dbgs() << "Final frame order:\n";

4920 for (auto &Obj : FrameObjects) {

4921 if (!Obj.IsValid)

4922 break;

4923 dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;

4924 if (Obj.ObjectFirst)

4925 dbgs() << ", first";

4926 if (Obj.GroupFirst)

4927 dbgs() << ", group-first";

4928 dbgs() << "\n";

4929 }

4930 });

4931}

4932

4933

4934

4935

4936

4938AArch64FrameLowering::inlineStackProbeLoopExactMultiple(

4946

4949 MF.insert(MBBInsertPoint, LoopMBB);

4951 MF.insert(MBBInsertPoint, ExitMBB);

4952

4953

4954

4958

4959 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))

4960 .addReg(AArch64::XZR)

4961 .addReg(AArch64::SP)

4964

4965 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),

4966 AArch64::XZR)

4967 .addReg(AArch64::SP)

4971

4972 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))

4976

4979

4983

4985

4986 return ExitMBB->begin();

4987}

4988

4989void AArch64FrameLowering::inlineStackProbeFixed(

4999

5002 int64_t NumBlocks = FrameSize / ProbeSize;

5003 int64_t ResidualSize = FrameSize % ProbeSize;

5004

5005 LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "

5006 << NumBlocks << " blocks of " << ProbeSize

5007 << " bytes, plus " << ResidualSize << " bytes\n");

5008

5009

5010

5012 for (int i = 0; i < NumBlocks; ++i) {

5013

5014

5018 EmitAsyncCFI && !HasFP, CFAOffset);

5020

5022 .addReg(AArch64::XZR)

5023 .addReg(AArch64::SP)

5026 }

5027 } else if (NumBlocks != 0) {

5028

5029

5033 EmitAsyncCFI && !HasFP, CFAOffset);

5035 MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);

5037 if (EmitAsyncCFI && !HasFP) {

5038

5041 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);

5042 unsigned CFIIndex =

5047 }

5048 }

5049

5050 if (ResidualSize != 0) {

5051

5052

5056 EmitAsyncCFI && !HasFP, CFAOffset);

5058

5060 .addReg(AArch64::XZR)

5061 .addReg(AArch64::SP)

5064 }

5065 }

5066}

5067

5068void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,

5070

5071

5072

5075 if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||

5076 MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)

5078

5080 if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {

5081 Register ScratchReg = MI->getOperand(0).getReg();

5082 int64_t FrameSize = MI->getOperand(1).getImm();

5084 MI->getOperand(3).getImm());

5085 inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,

5086 CFAOffset);

5087 } else {

5088 assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&

5089 "Stack probe pseudo-instruction expected");

5092 Register TargetReg = MI->getOperand(0).getReg();

5093 (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);

5094 }

5095 MI->eraseFromParent();

5096 }

5097}

5098

5101 NotAccessed = 0,

5102 GPR = 1 << 0,

5103 PPR = 1 << 1,

5104 FPR = 1 << 2,

5105 };

5106

5111

5113

5115 return std::make_tuple(start(), Idx) <

5116 std::make_tuple(Rhs.start(), Rhs.Idx);

5117 }

5118

5120

5121 return AccessTypes & (AccessType::GPR | AccessType::PPR);

5122 }

5123 bool isSME() const { return AccessTypes & AccessType::FPR; }

5124 bool isMixed() const { return isCPU() && isSME(); }

5125

5127 int64_t end() const { return start() + Size; }

5128

5130 switch (AccessTypes) {

5131 case AccessType::FPR:

5132 return "FPR";

5133 case AccessType::PPR:

5134 return "PPR";

5135 case AccessType::GPR:

5136 return "GPR";

5137 case AccessType::NotAccessed:

5138 return "NA";

5139 default:

5140 return "Mixed";

5141 }

5142 }

5143

5146 << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();

5147 if (Offset.getScalable())

5148 OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()

5149 << " * vscale";

5150 OS << "]";

5151 }

5152};

5153

5156 return OS;

5157}

5158

5159void AArch64FrameLowering::emitRemarks(

5161

5163 if (Attrs.hasNonStreamingInterfaceAndBody())

5164 return;

5165

5169

5170 if (HazardSize == 0)

5171 return;

5172

5174

5176 return;

5177

5178 std::vector StackAccesses(MFI.getNumObjects());

5179

5180 size_t NumFPLdSt = 0;

5181 size_t NumNonFPLdSt = 0;

5182

5183

5186 if (MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)

5187 continue;

5189 std::optional FI = getMMOFrameID(MMO, MFI);

5191 int FrameIdx = *FI;

5192

5195 StackAccesses[ArrIdx].Idx = FrameIdx;

5196 StackAccesses[ArrIdx].Offset =

5198 StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);

5199 }

5200

5203 if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))

5205 else

5209 }

5210

5211 StackAccesses[ArrIdx].AccessTypes |= RegTy;

5212

5214 ++NumFPLdSt;

5215 else

5216 ++NumNonFPLdSt;

5217 }

5218 }

5219 }

5220 }

5221

5222 if (NumFPLdSt == 0 || NumNonFPLdSt == 0)

5223 return;

5224

5230 }),

5231 StackAccesses.end());

5232

5235

5236 if (StackAccesses.front().isMixed())

5237 MixedObjects.push_back(&StackAccesses.front());

5238

5239 for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());

5240 It != End; ++It) {

5241 const auto &First = *It;

5242 const auto &Second = *(It + 1);

5243

5244 if (Second.isMixed())

5245 MixedObjects.push_back(&Second);

5246

5247 if ((First.isSME() && Second.isCPU()) ||

5248 (First.isCPU() && Second.isSME())) {

5250 if (Distance < HazardSize)

5252 }

5253 }

5254

5256 ORE->emit([&]() {

5258 "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());

5259 return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;

5260 });

5261 };

5262

5263 for (const auto &P : HazardPairs)

5264 EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());

5265

5266 for (const auto *Obj : MixedObjects)

5267 EmitRemark(

5268 formatv("{0} accessed by both GP and FP instructions", *Obj).str());

5269}

unsigned const MachineRegisterInfo * MRI

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)

Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.

static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL)

static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)

static void emitCalleeSavedRestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool SVE)

static void computeCalleeSaveRegisterPairs(MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI, SmallVectorImpl< RegPairInfo > &RegPairs, bool NeedsFrameRecord)

static const unsigned DefaultSafeSPDisplacement

This is the biggest offset to the stack pointer we can encode in aarch64 instructions (without using ...

static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned FixedObject)

static bool needsWinCFI(const MachineFunction &MF)

static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, unsigned DwarfReg)

static cl::opt< bool > StackTaggingMergeSetTag("stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden)

bool requiresGetVGCall(MachineFunction &MF)

bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, MachineFunction &MF)

bool isVGInstruction(MachineBasicBlock::iterator MBBI)

static std::optional< int > getLdStFrameID(const MachineInstr &MI, const MachineFrameInfo &MFI)

static bool produceCompactUnwindFrame(MachineFunction &MF)

static cl::opt< bool > StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", cl::init(false), cl::Hidden)

static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex, bool AssignOffsets)

static cl::opt< bool > OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden)

static bool windowsRequiresStackProbe(MachineFunction &MF, uint64_t StackSizeInBytes)

static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI)

static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, bool IsFirst, const TargetRegisterInfo *TRI)

static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0)

static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)

static StackOffset getSVEStackSize(const MachineFunction &MF)

Returns the size of the entire SVE stackframe (calleesaves + spills).

static cl::opt< bool > DisableMultiVectorSpillFill("aarch64-disable-multivector-spill-fill", cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden)

static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, MachineInstr::MIFlag Flag)

static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB)

static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)

Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.

cl::opt< bool > EnableHomogeneousPrologEpilog("homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)"))

MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, const AArch64FrameLowering *TFI)

static bool IsSVECalleeSave(MachineBasicBlock::iterator I)

static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, bool IsFirst, const TargetRegisterInfo *TRI)

Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.

unsigned findFreePredicateReg(BitVector &SavedRegs)

static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg)

static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)

static bool isTargetWindows(const MachineFunction &MF)

static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset)

static int64_t upperBound(StackOffset Size)

static unsigned estimateRSStackSizeLimit(MachineFunction &MF)

Look at each instruction that references stack frames and return the stack size limit beyond which so...

static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max)

returns true if there are any SVE callee saves.

static cl::opt< unsigned > StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), cl::Hidden)

static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE)

static bool isFuncletReturnInstr(const MachineInstr &MI)

static unsigned getStackHazardSize(const MachineFunction &MF)

static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool NeedsWinCFI, bool NeedsUnwindInfo)

static std::optional< int > getMMOFrameID(MachineMemOperand *MMO, const MachineFrameInfo &MFI)

static bool requiresSaveVG(MachineFunction &MF)

static unsigned getFixedObjectSize(const MachineFunction &MF, const AArch64FunctionInfo *AFI, bool IsWin64, bool IsFunclet)

Returns the size of the fixed object area (allocated next to sp on entry) On Win64 this may include a...

static const int kSetTagLoopThreshold

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

This file contains the simple types necessary to represent the attributes associated with functions a...

#define CASE(ATTRNAME, AANAME,...)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Analysis containing CSE Info

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

static const HTTPClientCleanup Cleanup

const HexagonInstrInfo * TII

static std::string getTypeString(Type *T)

This file implements the LivePhysRegs utility for tracking liveness of physical registers.

unsigned const TargetRegisterInfo * TRI

static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

uint64_t IntrinsicInst * II

static const MCPhysReg FPR[]

FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.

This file declares the machine register scavenger class.

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static const unsigned FramePtr

void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override

processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...

MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override

This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...

bool canUseAsPrologue(const MachineBasicBlock &MBB) const override

Check whether or not the given MBB can be used as a prologue for the target.

bool enableStackSlotScavenging(const MachineFunction &MF) const override

Returns true if the stack slot holes in the fixed and callee-save stack area should be used when allo...

bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...

bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...

StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI) const override

getFrameIndexReferenceFromSP - This method returns the offset from the stack pointer to the slot of t...

StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override

getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...

TargetStackID::Value getStackIDForScalableVectors() const override

Returns the StackID that scalable vectors should be associated with.

bool hasFPImpl(const MachineFunction &MF) const override

hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.

void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override

emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.

bool enableCFIFixup(MachineFunction &MF) const override

Returns true if we may need to fix the unwind information for the function.

void resetCFIToInitialState(MachineBasicBlock &MBB) const override

Emit CFI instructions that recreate the state of the unwind information upon fucntion entry.

bool hasReservedCallFrame(const MachineFunction &MF) const override

hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...

bool canUseRedZone(const MachineFunction &MF) const

Can this function use the red zone for local allocations.

void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override

processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...

int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const

unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const

Funclets only need to account for space for the callee saved registers, as the locals are accounted f...

void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override

Order the symbols in the local stack frame.

void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override

void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override

getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.

StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, Register &FrameReg, bool PreferFP, bool ForSimm) const

bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const override

assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.

StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override

For Win64 AArch64 EH, the offset to the Unwind object is from the SP before the update.

StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const

unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override

The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve the parent's frame pointer...

AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...

bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) const

void setSwiftAsyncContextFrameIdx(int FI)

unsigned getTailCallReservedStack() const

unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const

void setCalleeSaveBaseToFrameRecordOffset(int Offset)

bool hasStackProbing() const

unsigned getArgumentStackToRestore() const

void setLocalStackSize(uint64_t Size)

void setVGIdx(unsigned Idx)

int getCalleeSaveBaseToFrameRecordOffset() const

bool hasStreamingModeChanges() const

bool shouldSignReturnAddress(const MachineFunction &MF) const

void setPredicateRegForFillSpill(unsigned Reg)

int getStackHazardSlotIndex() const

void setStreamingVGIdx(unsigned FrameIdx)

int64_t getStackProbeSize() const

uint64_t getStackSizeSVE() const

void setHasRedZone(bool s)

bool hasStackFrame() const

std::optional< int > getTaggedBasePointerIndex() const

uint64_t getLocalStackSize() const

void setStackRealigned(bool s)

bool needsDwarfUnwindInfo(const MachineFunction &MF) const

unsigned getVarArgsGPRSize() const

void setStackSizeSVE(uint64_t S)

bool isStackRealigned() const

bool hasSwiftAsyncContext() const

bool hasStackHazardSlotIndex() const

void setTaggedBasePointerOffset(unsigned Offset)

void setStackHazardCSRSlotIndex(int Index)

unsigned getPredicateRegForFillSpill() const

unsigned getSVECalleeSavedStackSize() const

bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const

int64_t getStreamingVGIdx() const

void setMinMaxSVECSFrameIndex(int Min, int Max)

bool hasCalleeSaveStackFreeSpace() const

static bool isTailCallReturnInst(const MachineInstr &MI)

Returns true if MI is one of the TCRETURN* instructions.

static bool isSEHInstruction(const MachineInstr &MI)

Return true if the instructions is a SEH instruciton used for unwinding on Windows.

static bool isFpOrNEON(Register Reg)

Returns whether the physical register is FP or NEON.

bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const

bool hasBasePointer(const MachineFunction &MF) const

bool cannotEliminateFrame(const MachineFunction &MF) const

unsigned getBaseRegister() const

bool isTargetWindows() const

const AArch64RegisterInfo * getRegisterInfo() const override

bool isNeonAvailable() const

Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....

const AArch64InstrInfo * getInstrInfo() const override

bool isTargetILP32() const

const AArch64TargetLowering * getTargetLowering() const override

bool isTargetMachO() const

const Triple & getTargetTriple() const

const char * getChkStkName() const

bool isSVEorStreamingSVEAvailable() const

Returns true if the target has access to either the full range of SVE instructions,...

bool isStreaming() const

Returns true if the function has a streaming body.

bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const

bool swiftAsyncContextIsDynamicallySet() const

Return whether FrameLowering should always set the "extended frame present" bit in FP,...

bool hasInlineStackProbe(const MachineFunction &MF) const override

True if stack clash protection is enabled for this functions.

unsigned getRedZoneSize(const Function &F) const

bool supportSwiftError() const override

Return true if the target supports swifterror attribute.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const

Return true if the specified attribute is set for at least one parameter or for the return value.

bool test(unsigned Idx) const

size_type count() const

count - Returns the number of bits which are set.

iterator_range< const_set_bits_iterator > set_bits() const

The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...

bool hasOptSize() const

Optimize this function for size (-Os) or minimum size (-Oz).

bool hasMinSize() const

Optimize this function for minimum size (-Oz).

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

AttributeList getAttributes() const

Return the attribute list for this Function.

bool isVarArg() const

isVarArg - Return true if this function takes a variable number of arguments.

bool hasFnAttribute(Attribute::AttrKind Kind) const

Return true if the function has the attribute.

void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override

Emit instructions to copy a pair of physical registers.

A set of physical registers with utility functions to track liveness when walking backward/forward th...

bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const

Returns true if register Reg and no aliasing register is in the set.

void stepBackward(const MachineInstr &MI)

Simulates liveness when stepping backwards over an instruction(bundle).

void removeReg(MCPhysReg Reg)

Removes a physical register, all its sub-registers, and all its super-registers from the set.

void addLiveIns(const MachineBasicBlock &MBB)

Adds all live-in registers of basic block MBB.

void addLiveOuts(const MachineBasicBlock &MBB)

Adds all live-out registers of basic block MBB.

void addReg(MCPhysReg Reg)

Adds a physical register and all its sub-registers to the set.

bool usesWindowsCFI() const

static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})

.cfi_def_cfa_register modifies a rule for computing CFA.

static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})

.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...

static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})

.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.

static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})

.cfi_offset Previous value of Register is saved at offset Offset from CFA.

static MCCFIInstruction createNegateRAStateWithPC(MCSymbol *L, SMLoc Loc={})

.cfi_negate_ra_state_with_pc AArch64 negate RA state with PC.

static MCCFIInstruction createNegateRAState(MCSymbol *L, SMLoc Loc={})

.cfi_negate_ra_state AArch64 negate RA state.

static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})

.cfi_def_cfa_offset modifies a rule for computing CFA.

static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")

.cfi_escape Allows the user to add arbitrary bytes to the unwind info.

static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})

.cfi_same_value Current value of Register is the same as in the previous frame.

MCSymbol * createTempSymbol()

Create a temporary symbol with a unique name.

Describe properties that are true of each instruction in the target description file.

Wrapper class representing physical registers. Should be passed by value.

MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...

void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)

Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...

instr_iterator instr_begin()

iterator_range< livein_iterator > liveins() const

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

bool isEHFuncletEntry() const

Returns true if this is the entry block of an EH funclet.

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

MachineInstr & instr_back()

void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())

Add Succ as a successor of this MachineBasicBlock.

DebugLoc findDebugLoc(instr_iterator MBBI)

Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.

iterator getLastNonDebugInstr(bool SkipPseudoOp=true)

Returns an iterator to the last non-debug instruction in the basic block, or end().

instr_iterator instr_end()

void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())

Adds the specified register as a live in.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

reverse_iterator rbegin()

iterator insertAfter(iterator I, MachineInstr *MI)

Insert MI into the instruction list after I.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const

Return true if the specified register is in the live in set.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)

Create a new object at a fixed location on the stack.

bool hasVarSizedObjects() const

This method may be called any time after instruction selection is complete to determine if the stack ...

uint64_t getStackSize() const

Return the number of bytes that must be allocated to hold all of the fixed size frame objects.

const AllocaInst * getObjectAllocation(int ObjectIdx) const

Return the underlying Alloca of the specified stack object if it exists.

int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)

Create a new statically sized stack object, returning a nonnegative identifier to represent it.

bool hasCalls() const

Return true if the current function has any function calls.

bool isFrameAddressTaken() const

This method may be called any time after instruction selection is complete to determine if there is a...

Align getMaxAlign() const

Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...

void setObjectOffset(int ObjectIdx, int64_t SPOffset)

Set the stack frame offset of the specified object.

uint64_t getMaxCallFrameSize() const

Return the maximum size of a call frame that must be allocated for an outgoing function call.

bool hasPatchPoint() const

This method may be called any time after instruction selection is complete to determine if there is a...

int getStackProtectorIndex() const

Return the index for the stack protector object.

int CreateSpillStackObject(uint64_t Size, Align Alignment)

Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...

uint64_t estimateStackSize(const MachineFunction &MF) const

Estimate and return the size of the stack frame.

void setStackID(int ObjectIdx, uint8_t ID)

bool isCalleeSavedInfoValid() const

Has the callee saved info been calculated yet?

Align getObjectAlign(int ObjectIdx) const

Return the alignment of the specified stack object.

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

bool isMaxCallFrameSizeComputed() const

bool hasStackMap() const

This method may be called any time after instruction selection is complete to determine if there is a...

const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const

Returns a reference to call saved info vector for the current function.

unsigned getNumObjects() const

Return the number of objects.

int getObjectIndexEnd() const

Return one past the maximum frame object index.

bool hasStackProtectorIndex() const

bool hasStackObjects() const

Return true if there are any stack objects in this function.

uint8_t getStackID(int ObjectIdx) const

unsigned getNumFixedObjects() const

Return the number of fixed objects.

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

int getObjectIndexBegin() const

Return the minimum frame object index.

void setObjectAlignment(int ObjectIdx, Align Alignment)

setObjectAlignment - Change the alignment of the specified stack object.

bool isDeadObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a dead object.

const WinEHFuncInfo * getWinEHFuncInfo() const

getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.

unsigned addFrameInst(const MCCFIInstruction &Inst)

void setHasWinCFI(bool v)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MCContext & getContext() const

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineBasicBlock & front() const

bool hasEHFunclets() const

MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)

CreateMachineBasicBlock - Allocate a new MachineBasicBlock.

void insert(iterator MBBI, MachineBasicBlock *MBB)

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const

const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const

const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const

const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addFrameIndex(int Idx) const

const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

const MachineInstrBuilder & setMIFlags(unsigned Flags) const

const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

void setFlags(unsigned flags)

void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

uint32_t getFlags() const

Return the MI flags bitvector.

A description of a memory reference used in the backend.

const PseudoSourceValue * getPseudoValue() const

@ MOLoad

The memory access reads data.

@ MOStore

The memory access writes data.

const Value * getValue() const

Return the base address of the memory access.

MachineOperand class - Representation of each machine instruction operand.

void setImm(int64_t immVal)

static MachineOperand CreateImm(int64_t Val)

bool isFI() const

isFI - Tests if this is a MO_FrameIndex operand.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")

createVirtualRegister - Create and return a new virtual register in the function with the specified r...

bool isLiveIn(Register Reg) const

const MCPhysReg * getCalleeSavedRegs() const

Returns list of callee saved registers.

bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const

Return true if the specified register is modified or read in this function.

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

void enterBasicBlockEnd(MachineBasicBlock &MBB)

Start tracking liveness from the end of basic block MBB.

Register FindUnusedReg(const TargetRegisterClass *RC) const

Find an unused register of the specified register class.

void backward()

Update internal register state and move MBB iterator backwards.

void addScavengingFrameIndex(int FI)

Add a scavenging frame index.

Wrapper class representing virtual and physical registers.

SMEAttrs is a utility class to parse the SME ACLE attributes on functions.

bool hasStreamingInterface() const

bool hasStreamingBody() const

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

int64_t getFixed() const

Returns the fixed component of the stack.

int64_t getScalable() const

Returns the scalable component of the stack.

static StackOffset get(int64_t Fixed, int64_t Scalable)

static StackOffset getScalable(int64_t Scalable)

static StackOffset getFixed(int64_t Fixed)

StringRef - Represent a constant reference to a string, i.e.

bool hasFP(const MachineFunction &MF) const

hasFP - Return true if the specified function should have a dedicated frame pointer register.

virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

int getOffsetOfLocalArea() const

getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...

Align getStackAlign() const

getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...

StackDirection getStackGrowthDirection() const

getStackGrowthDirection - Return the direction the stack grows

virtual bool enableCFIFixup(MachineFunction &MF) const

Returns true if we may need to fix the unwind information for the function.

TargetInstrInfo - Interface to description of machine instruction set.

CodeModel::Model getCodeModel() const

Returns the code model.

const MCAsmInfo * getMCAsmInfo() const

Return target specific asm information.

SwiftAsyncFramePointerMode SwiftAsyncFramePointer

Control when and how the Swift async frame pointer bit should be set.

bool DisableFramePointerElim(const MachineFunction &MF) const

DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const

Returns the Register Class of a physical register of the given type, picking the most sub register cl...

Align getSpillAlign(const TargetRegisterClass &RC) const

Return the minimum required alignment in bytes for a spill slot for a register of this class.

bool hasStackRealignment(const MachineFunction &MF) const

True if stack realignment is required and still possible.

unsigned getSpillSize(const TargetRegisterClass &RC) const

Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...

TargetSubtargetInfo - Generic base class for all target subtargets.

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

virtual const TargetInstrInfo * getInstrInfo() const

StringRef getArchName() const

Get the architecture (first) component of the triple.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

The instances of the Type class are immutable: once they are created, they are never changed.

constexpr ScalarTy getFixedValue() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ MO_GOT

MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...

static unsigned getShiftValue(unsigned Imm)

getShiftValue - Extract the shift value.

static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)

getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...

static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)

encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...

static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)

getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...

const unsigned StackProbeMaxLoopUnroll

Maximum number of iterations to unroll for a constant size probing loop.

const unsigned StackProbeMaxUnprobedStack

Maximum allowed number of unprobed bytes above SP at an ABI boundary.

constexpr char Attrs[]

Key for Kernel::Metadata::mAttrs.

@ AArch64_SVE_VectorCall

Used between AArch64 SVE functions.

@ PreserveMost

Used for runtime calls that preserves most registers.

@ CXX_FAST_TLS

Used for access functions.

@ GHC

Used by the Glasgow Haskell Compiler (GHC).

@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1

Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.

@ PreserveAll

Used for runtime calls that preserves (almost) all registers.

@ PreserveNone

Used for runtime calls that preserves none general registers.

@ Win64

The C convention as implemented on Windows/x86-64 and AArch64.

@ SwiftTail

This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...

@ Implicit

Not emitted register (e.g. carry, or temporary result).

@ Define

Register definition.

@ Kill

The last use of a register.

@ Undef

Value of the register doesn't matter.

Reg

All possible values of the reg field in the ModR/M byte.

initializer< Ty > init(const Ty &Val)

NodeAddr< InstrNode * > Instr

This is an optimization pass for GlobalISel generic memory operations.

void stable_sort(R &&Range)

MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)

Check if the Offset is a valid frame offset for MI.

detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)

MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

unsigned getBLRCallOpcode(const MachineFunction &MF)

Return opcode to be used for indirect calls.

const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

@ AArch64FrameOffsetCannotUpdate

Offset cannot apply.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)

auto reverse(ContainerTy &&C)

void sort(IteratorTy Start, IteratorTy End)

@ Always

Always set the bit.

@ Never

Never set the bit.

@ DeploymentBased

Determine whether to set the bit statically or dynamically based on the deployment target.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)

emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

EHPersonality classifyEHPersonality(const Value *Pers)

See if the given exception handling personality function is one that we understand.

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

auto remove_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.

unsigned getDefRegState(bool B)

unsigned getKillRegState(bool B)

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

bool isAsynchronousEHPersonality(EHPersonality Pers)

Returns true if this personality function catches asynchronous exceptions.

void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)

Convenience function for recomputing live-in's for a set of MBBs until the computation converges.

Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

bool operator<(const StackAccess &Rhs) const

void print(raw_ostream &OS) const

std::string getTypeString() const

This struct is a compact representation of a valid (non-zero power of two) alignment.

uint64_t value() const

This is a hole in the type system and should not be abused.

Description of the encoding of one expression Op.

Pair of physical register and lane mask.

static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.