LLVM: lib/Target/AArch64/AArch64FrameLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

259#include

260#include

261#include

262#include

263#include

264

265using namespace llvm;

266

267#define DEBUG_TYPE "frame-info"

268

270 cl::desc("enable use of redzone on AArch64"),

272

274 "stack-tagging-merge-settag",

275 cl::desc("merge settag instruction in function epilog"), cl::init(true),

277

279 cl::desc("sort stack allocations"),

281

284 cl::desc("Split allocation of ZPR & PPR objects"),

286

288 "homogeneous-prolog-epilog", cl::Hidden,

289 cl::desc("Emit homogeneous prologue and epilogue for the size "

290 "optimization (default = off)"));

291

292

296

300

302 "aarch64-disable-multivector-spill-fill",

303 cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false),

305

306int64_t

307AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF,

311 bool IsTailCallReturn = (MBB.end() != MBBI)

314

315 int64_t ArgumentPopSize = 0;

316 if (IsTailCallReturn) {

318

319

320

321

322 ArgumentPopSize = StackAdjust.getImm();

323 } else {

324

325

326

327

329 }

330

331 return ArgumentPopSize;

332}

333

336

338

339

340

343

347

353

356

357

358

361 : 0,

363}

364

365

366

367

372 return true;

373

376

380 return true;

381 }

382

383 return false;

384}

385

386

387

388

389bool AArch64FrameLowering::homogeneousPrologEpilog(

392 return false;

394 return false;

396 return false;

397

398

400 return false;

401

402

404 return false;

405

406

407 const MachineFrameInfo &MFI = MF.getFrameInfo();

410 return false;

411 if (Exit && getArgumentStackToRestore(MF, *Exit))

412 return false;

413

414 auto *AFI = MF.getInfo();

416 return false;

417

418

419

420

422 unsigned NumGPRs = 0;

423 for (unsigned I = 0; CSRegs[I]; ++I) {

425 if (Reg == AArch64::LR) {

426 assert(CSRegs[I + 1] == AArch64::FP);

427 if (NumGPRs % 2 != 0)

428 return false;

429 break;

430 }

431 if (AArch64::GPR64RegClass.contains(Reg))

432 ++NumGPRs;

433 }

434

435 return true;

436}

437

438

439bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {

441}

442

443

444

445

446

448

449

450

451

453

454

455

458 if (MI.isDebugInstr() || MI.isPseudo() ||

459 MI.getOpcode() == AArch64::ADDXri ||

460 MI.getOpcode() == AArch64::ADDSXri)

461 continue;

462

464 if (!MO.isFI())

465 continue;

466

470 return 0;

471 }

472 }

473 }

475}

476

481

482unsigned

483AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,

485 bool IsWin64, bool IsFunclet) const {

487 "Tail call reserved stack must be aligned to 16 bytes");

488 if (!IsWin64 || IsFunclet) {

490 } else {

493 Attribute::SwiftAsync))

494 report_fatal_error("cannot generate ABI-changing tail call for Win64");

496

497

499

501

507 int FrameIndex = H.CatchObj.FrameIndex;

508 if ((FrameIndex != INT_MAX) &&

509 CatchObjFrameIndices.insert(FrameIndex)) {

510 FixedObjectSize = alignTo(FixedObjectSize,

513 }

514 }

515 }

516

517 FixedObjectSize += 8;

518 }

519 return alignTo(FixedObjectSize, 16);

520 }

521}

522

525 return false;

526

527

528

530 const unsigned RedZoneSize =

532 if (!RedZoneSize)

533 return false;

534

538

539

540

541

542

543 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&

545 !Subtarget.hasSVE();

546

547 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||

549}

550

551

552

557

558

559

560

562 return true;

563

565 return true;

568 RegInfo->hasStackRealignment(MF))

569 return true;

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

592 return true;

593

594

595

596

597

598

599

600

603 return true;

604

605 return false;

606}

607

608

612

613

614

615 if (TT.isOSDarwin() || TT.isOSWindows())

616 return true;

617

618

620 return true;

621

622

624 return true;

625

626 return false;

627}

628

629

630

631

632

633

643

647

653 unsigned Opc = I->getOpcode();

654 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();

655 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;

656

658 int64_t Amount = I->getOperand(0).getImm();

660 if (!IsDestroy)

661 Amount = -Amount;

662

663

664

665

666 if (CalleePopAmount == 0) {

667

668

669

670

671

672

673

674

675

676

677 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");

678

681

682

683

684

685

686

688 "non-reserved call frame without var sized objects?");

691 inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));

692 } else {

695 }

696 }

697 } else if (CalleePopAmount != 0) {

698

699

700 assert(CalleePopAmount < 0xffffff && "call frame too large");

703 }

704 return MBB.erase(I);

705}

706

709

712 const auto &TRI = *Subtarget.getRegisterInfo();

714

716

717

718 CFIBuilder.buildDefCFA(AArch64::SP, 0);

719

720

721 if (MFI.shouldSignReturnAddress(MF))

722 MFI.branchProtectionPAuthLR() ? CFIBuilder.buildNegateRAStateWithPC()

723 : CFIBuilder.buildNegateRAState();

724

725

726 if (MFI.needsShadowCallStackPrologueEpilogue(MF))

727 CFIBuilder.buildSameValue(AArch64::X18);

728

729

730 const std::vector &CSI =

732 for (const auto &Info : CSI) {

734 if (TRI.regNeedsCFI(Reg, Reg))

735 continue;

736 CFIBuilder.buildSameValue(Reg);

737 }

738}

739

741 switch (Reg.id()) {

742 default:

743

744

745 return 0;

746

747

748#define CASE(n) \

749 case AArch64::W##n: \

750 case AArch64::X##n: \

751 return AArch64::X##n

771#undef CASE

772

773

774#define CASE(n) \

775 case AArch64::B##n: \

776 case AArch64::H##n: \

777 case AArch64::S##n: \

778 case AArch64::D##n: \

779 case AArch64::Q##n: \

780 return HasSVE ? AArch64::Z##n : AArch64::Q##n

813#undef CASE

814 }

815}

816

817void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,

819

821

822

825 DL = MBBI->getDebugLoc();

826

827 const MachineFunction &MF = *MBB.getParent();

828 const AArch64Subtarget &STI = MF.getSubtarget();

830

831 BitVector GPRsToZero(TRI.getNumRegs());

832 BitVector FPRsToZero(TRI.getNumRegs());

834 for (MCRegister Reg : RegsToZero.set_bits()) {

835 if (TRI.isGeneralPurposeRegister(MF, Reg)) {

836

838 GPRsToZero.set(XReg);

840

842 FPRsToZero.set(XReg);

843 }

844 }

845

847

848

849 for (MCRegister Reg : GPRsToZero.set_bits())

851

852

853 for (MCRegister Reg : FPRsToZero.set_bits())

855

856 if (HasSVE) {

857 for (MCRegister PReg :

858 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,

859 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,

860 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,

861 AArch64::P15}) {

862 if (RegsToZero[PReg])

864 }

865 }

866}

867

868bool AArch64FrameLowering::windowsRequiresStackProbe(

869 const MachineFunction &MF, uint64_t StackSizeInBytes) const {

870 const AArch64Subtarget &Subtarget = MF.getSubtarget();

871 const AArch64FunctionInfo &MFI = *MF.getInfo();

872

873

874 return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&

875 StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());

876}

877

882

884 for (unsigned i = 0; CSRegs[i]; ++i)

886}

887

889AArch64FrameLowering::findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,

890 bool HasCall) const {

892

893

894

895

898 return AArch64::X9;

899

900 const AArch64Subtarget &Subtarget = MF->getSubtarget();

902 LivePhysRegs LiveRegs(TRI);

904 if (HasCall) {

905 LiveRegs.addReg(AArch64::X16);

906 LiveRegs.addReg(AArch64::X17);

907 LiveRegs.addReg(AArch64::X18);

908 }

909

910

911 const MachineRegisterInfo &MRI = MF->getRegInfo();

912 if (LiveRegs.available(MRI, AArch64::X9))

913 return AArch64::X9;

914

915 for (unsigned Reg : AArch64::GPR64RegClass) {

916 if (LiveRegs.available(MRI, Reg))

917 return Reg;

918 }

919 return AArch64::NoRegister;

920}

921

930

936

937

938 if (LiveRegs.available(MRI, AArch64::X16) ||

940 return false;

941 }

942

943

944

946 MBB.isLiveIn(AArch64::NZCV))

947 return false;

948

950 if (findScratchNonCalleeSaveRegister(TmpMBB) == AArch64::NoRegister)

951 return false;

952

953

954

956 windowsRequiresStackProbe(*MF, std::numeric_limits<uint64_t>::max()))

957 if (findScratchNonCalleeSaveRegister(TmpMBB, true) == AArch64::NoRegister)

958 return false;

959

960 return true;

961}

962

966 F.needsUnwindTableEntry();

967}

968

969bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(

971

972

974 return false;

977}

978

979

980

985 unsigned Opc = MBBI->getOpcode();

989 unsigned ImmIdx = MBBI->getNumOperands() - 1;

990 int Imm = MBBI->getOperand(ImmIdx).getImm();

994

995 switch (Opc) {

996 default:

998 case AArch64::STR_ZXI:

999 case AArch64::LDR_ZXI: {

1000 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1001 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveZReg))

1005 break;

1006 }

1007 case AArch64::STR_PXI:

1008 case AArch64::LDR_PXI: {

1009 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1014 break;

1015 }

1016 case AArch64::LDPDpost:

1018 [[fallthrough]];

1019 case AArch64::STPDpre: {

1020 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1021 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());

1022 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))

1027 break;

1028 }

1029 case AArch64::LDPXpost:

1031 [[fallthrough]];

1032 case AArch64::STPXpre: {

1033 Register Reg0 = MBBI->getOperand(1).getReg();

1034 Register Reg1 = MBBI->getOperand(2).getReg();

1035 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)

1039 else

1041 .addImm(RegInfo->getSEHRegNum(Reg0))

1042 .addImm(RegInfo->getSEHRegNum(Reg1))

1045 break;

1046 }

1047 case AArch64::LDRDpost:

1049 [[fallthrough]];

1050 case AArch64::STRDpre: {

1051 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1056 break;

1057 }

1058 case AArch64::LDRXpost:

1060 [[fallthrough]];

1061 case AArch64::STRXpre: {

1062 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1067 break;

1068 }

1069 case AArch64::STPDi:

1070 case AArch64::LDPDi: {

1071 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1072 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1078 break;

1079 }

1080 case AArch64::STPXi:

1081 case AArch64::LDPXi: {

1082 Register Reg0 = MBBI->getOperand(0).getReg();

1083 Register Reg1 = MBBI->getOperand(1).getReg();

1084

1085 int SEHReg0 = RegInfo->getSEHRegNum(Reg0);

1086 int SEHReg1 = RegInfo->getSEHRegNum(Reg1);

1087

1088 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)

1092 else if (SEHReg0 >= 19 && SEHReg1 >= 19)

1098 else

1099 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegIP))

1104 break;

1105 }

1106 case AArch64::STRXui:

1107 case AArch64::LDRXui: {

1108 int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1109 if (Reg >= 19)

1114 else

1115 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegI))

1119 break;

1120 }

1121 case AArch64::STRDui:

1122 case AArch64::LDRDui: {

1123 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1128 break;

1129 }

1130 case AArch64::STPQi:

1131 case AArch64::LDPQi: {

1132 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());

1133 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1134 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP))

1139 break;

1140 }

1141 case AArch64::LDPQpost:

1143 [[fallthrough]];

1144 case AArch64::STPQpre: {

1145 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());

1146 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());

1147 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX))

1152 break;

1153 }

1154 }

1156 return I;

1157}

1158

1162 return false;

1163

1164

1166 if (ST.isTargetDarwin())

1167 return ST.hasSVE();

1168 return true;

1169}

1170

1174

1179

1181 DebugLoc DL;

1183

1186 };

1187

1192 DL = MBBI->getDebugLoc();

1193

1196 };

1197

1198

1199 EmitSignRA(MF.front());

1201 if (MBB.isEHFuncletEntry())

1202 EmitSignRA(MBB);

1203 if (MBB.isReturnBlock())

1204 EmitAuthRA(MBB);

1205 }

1206}

1207

1213

1219

1224

1229

1230

1231

1232

1233

1238 MF, FI, FrameReg,

1239

1242 false);

1243}

1244

1247 int FI) const {

1248

1249

1250

1251

1252

1253

1255

1259 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;

1260

1261

1262

1263

1264 if (MFI.isVariableSizedObjectIndex(FI)) {

1266 }

1267

1268

1269 if (!SVEStackSize)

1271

1273 bool FPAfterSVECalleeSaves =

1275 if (MFI.hasScalableStackID(FI)) {

1276 if (FPAfterSVECalleeSaves &&

1279 "split-sve-objects not supported with FPAfterSVECalleeSaves");

1281 }

1283

1284

1287 AccessOffset = -PPRStackSize;

1288 return AccessOffset +

1290 ObjectOffset);

1291 }

1292

1293 bool IsFixed = MFI.isFixedObjectIndex(FI);

1294 bool IsCSR =

1296

1298 if (!IsFixed && !IsCSR) {

1299 ScalableOffset = -SVEStackSize;

1300 } else if (FPAfterSVECalleeSaves && IsCSR) {

1301 ScalableOffset =

1303 }

1304

1306}

1307

1310 int FI) const {

1312}

1313

1315 int64_t ObjectOffset) const {

1319 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());

1320 unsigned FixedObject =

1321 getFixedObjectSize(MF, AFI, IsWin64, false);

1323 int64_t FPAdjust =

1326}

1327

1329 int64_t ObjectOffset) const {

1332}

1333

1334

1336 int FI) const {

1340 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP

1341 ? getFPOffset(MF, ObjectOffset).getFixed()

1342 : getStackOffset(MF, ObjectOffset).getFixed();

1343}

1344

1347 bool ForSimm) const {

1349 int64_t ObjectOffset = MFI.getObjectOffset(FI);

1350 bool isFixed = MFI.isFixedObjectIndex(FI);

1353 FrameReg, PreferFP, ForSimm);

1354}

1355

1357 const MachineFunction &MF, int64_t ObjectOffset, bool isFixed,

1359 bool ForSimm) const {

1364

1365 int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();

1366 int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();

1367 bool isCSR =

1369 bool isSVE = MFI.isScalableStackID(StackID);

1370

1373 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;

1374

1375

1376

1377

1378

1379 bool UseFP = false;

1381

1382

1383

1384 PreferFP &= !SVEStackSize;

1385

1386

1387

1388

1389

1390 if (isFixed) {

1391 UseFP = hasFP(MF);

1392 } else if (isCSR && RegInfo->hasStackRealignment(MF)) {

1393

1394

1395

1396 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");

1397 UseFP = true;

1398 } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {

1399

1400

1401

1402

1403 bool FPOffsetFits = !ForSimm || FPOffset >= -256;

1404 PreferFP |= Offset > -FPOffset && !SVEStackSize;

1405

1406 if (FPOffset >= 0) {

1407

1408

1409 UseFP = true;

1410 } else if (MFI.hasVarSizedObjects()) {

1411

1412

1413

1414 bool CanUseBP = RegInfo->hasBasePointer(MF);

1415 if (FPOffsetFits && CanUseBP)

1416 UseFP = PreferFP;

1417 else if (!CanUseBP)

1418 UseFP = true;

1419

1420

1421

1422 } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {

1423

1424

1425

1426 (void) Subtarget;

1429 "Funclets should only be present on Win64");

1430 UseFP = true;

1431 } else {

1432

1433 if (FPOffsetFits && PreferFP)

1434 UseFP = true;

1435 }

1436 }

1437 }

1438

1440 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&

1441 "In the presence of dynamic stack pointer realignment, "

1442 "non-argument/CSR objects cannot be accessed through the frame pointer");

1443

1444 bool FPAfterSVECalleeSaves =

1446

1447 if (isSVE) {

1451 SVEStackSize +

1453 ObjectOffset);

1454

1455

1456

1458

1459

1460 FPOffset -= PPRStackSize;

1461

1462 SPOffset -= PPRStackSize;

1463

1464

1465 }

1466

1467 if (FPAfterSVECalleeSaves) {

1472 }

1473 }

1474

1475

1478 RegInfo->hasStackRealignment(MF))) {

1479 FrameReg = RegInfo->getFrameRegister(MF);

1480 return FPOffset;

1481 }

1482 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()

1484

1485 return SPOffset;

1486 }

1487

1489 if (FPAfterSVECalleeSaves) {

1490

1491

1494 if (UseFP) {

1495 if (isFixed)

1496 SVEAreaOffset = SVECalleeSavedStack;

1497 else if (!isCSR)

1498 SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;

1499 } else {

1500 if (isFixed)

1501 SVEAreaOffset = SVEStackSize;

1502 else if (isCSR)

1503 SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;

1504 }

1505 } else {

1506 if (UseFP && !(isFixed || isCSR))

1507 SVEAreaOffset = -SVEStackSize;

1508 if (!UseFP && (isFixed || isCSR))

1509 SVEAreaOffset = SVEStackSize;

1510 }

1511

1512 if (UseFP) {

1513 FrameReg = RegInfo->getFrameRegister(MF);

1515 }

1516

1517

1518 if (RegInfo->hasBasePointer(MF))

1519 FrameReg = RegInfo->getBaseRegister();

1520 else {

1521 assert(!MFI.hasVarSizedObjects() &&

1522 "Can't use SP when we have var sized objects.");

1523 FrameReg = AArch64::SP;

1524

1525

1526

1529 }

1530

1532}

1533

1535

1536

1537

1538

1539

1542}

1543

1551 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&

1554}

1555

1557 unsigned SpillCount, unsigned Reg1,

1558 unsigned Reg2, bool NeedsWinCFI,

1559 bool IsFirst,

1561

1562

1563

1564

1565

1566

1567

1568 if (Reg2 == AArch64::FP)

1569 return true;

1570 if (!NeedsWinCFI)

1571 return false;

1572

1573

1574

1575

1576

1577

1578 if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)

1579 return SpillExtendedVolatile

1580 ? !((Reg1 == AArch64::FP && Reg2 == AArch64::LR) ||

1581 (SpillCount % 2) == 0)

1582 : false;

1583

1584

1585

1586

1587

1588

1589 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&

1590 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)

1591 return false;

1592 return true;

1593}

1594

1595

1596

1597

1598

1600 unsigned SpillCount, unsigned Reg1,

1601 unsigned Reg2, bool UsesWinAAPCS,

1602 bool NeedsWinCFI, bool NeedsFrameRecord,

1603 bool IsFirst,

1605 if (UsesWinAAPCS)

1607 Reg1, Reg2, NeedsWinCFI, IsFirst,

1609

1610

1611

1612 if (NeedsFrameRecord)

1613 return Reg2 == AArch64::LR;

1614

1615 return false;

1616}

1617

1618namespace {

1619

1620struct RegPairInfo {

1623 int FrameIdx;

1625 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;

1626 const TargetRegisterClass *RC;

1627

1628 RegPairInfo() = default;

1629

1630 bool isPaired() const { return Reg2.isValid(); }

1631

1632 bool isScalable() const { return Type == PPR || Type == ZPR; }

1633};

1634

1635}

1636

1638 for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {

1639 if (SavedRegs.test(PReg)) {

1640 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;

1642 }

1643 }

1645}

1646

1647

1651 return false;

1652

1654 bool IsLocallyStreaming =

1656

1657

1658

1659

1660 return Subtarget.hasSVE2p1() ||

1661 (Subtarget.hasSME2() &&

1662 (!IsLocallyStreaming && Subtarget.isStreaming()));

1663}

1664

1670 bool NeedsFrameRecord) {

1671

1672 if (CSI.empty())

1673 return;

1674

1682 (void)CC;

1683

1684

1688 (Count & 1) == 0) &&

1689 "Odd number of callee-saved regs to spill!");

1691 int StackFillDir = -1;

1692 int RegInc = 1;

1693 unsigned FirstReg = 0;

1694 if (NeedsWinCFI) {

1695

1696 ByteOffset = 0;

1697 StackFillDir = 1;

1698

1699

1700 RegInc = -1;

1701 FirstReg = Count - 1;

1702 }

1703

1705

1706

1707

1708

1709

1710

1711

1712

1713 bool SpillExtendedVolatile =

1715 const auto &Reg = CSI.getReg();

1716 return Reg >= AArch64::X0 && Reg <= AArch64::X18;

1717 });

1718

1719 int ZPRByteOffset = 0;

1720 int PPRByteOffset = 0;

1722 if (SplitPPRs) {

1725 } else if (!FPAfterSVECalleeSaves) {

1726 ZPRByteOffset =

1728

1729 PPRByteOffset = 0;

1730 }

1731

1735

1736

1737 for (unsigned i = FirstReg; i < Count; i += RegInc) {

1738 RegPairInfo RPI;

1739 RPI.Reg1 = CSI[i].getReg();

1740

1741 if (AArch64::GPR64RegClass.contains(RPI.Reg1)) {

1742 RPI.Type = RegPairInfo::GPR;

1743 RPI.RC = &AArch64::GPR64RegClass;

1744 } else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) {

1745 RPI.Type = RegPairInfo::FPR64;

1746 RPI.RC = &AArch64::FPR64RegClass;

1747 } else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) {

1748 RPI.Type = RegPairInfo::FPR128;

1749 RPI.RC = &AArch64::FPR128RegClass;

1750 } else if (AArch64::ZPRRegClass.contains(RPI.Reg1)) {

1751 RPI.Type = RegPairInfo::ZPR;

1752 RPI.RC = &AArch64::ZPRRegClass;

1753 } else if (AArch64::PPRRegClass.contains(RPI.Reg1)) {

1754 RPI.Type = RegPairInfo::PPR;

1755 RPI.RC = &AArch64::PPRRegClass;

1756 } else if (RPI.Reg1 == AArch64::VG) {

1757 RPI.Type = RegPairInfo::VG;

1758 RPI.RC = &AArch64::FIXED_REGSRegClass;

1759 } else {

1761 }

1762

1763 int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs

1764 ? PPRByteOffset

1765 : ZPRByteOffset;

1766

1767

1768 if (HasCSHazardPadding &&

1771 ByteOffset += StackFillDir * StackHazardSize;

1772 LastReg = RPI.Reg1;

1773

1774 int Scale = TRI->getSpillSize(*RPI.RC);

1775

1776 if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {

1777 MCRegister NextReg = CSI[i + RegInc].getReg();

1778 bool IsFirst = i == FirstReg;

1779 unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;

1780 switch (RPI.Type) {

1781 case RegPairInfo::GPR:

1782 if (AArch64::GPR64RegClass.contains(NextReg) &&

1784 SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,

1785 NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))

1786 RPI.Reg2 = NextReg;

1787 break;

1788 case RegPairInfo::FPR64:

1789 if (AArch64::FPR64RegClass.contains(NextReg) &&

1791 RPI.Reg1, NextReg, NeedsWinCFI,

1792 IsFirst, TRI))

1793 RPI.Reg2 = NextReg;

1794 break;

1795 case RegPairInfo::FPR128:

1796 if (AArch64::FPR128RegClass.contains(NextReg))

1797 RPI.Reg2 = NextReg;

1798 break;

1799 case RegPairInfo::PPR:

1800 break;

1801 case RegPairInfo::ZPR:

1803 ((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {

1804

1805

1806 int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;

1808 RPI.Reg2 = NextReg;

1809 }

1810 break;

1811 case RegPairInfo::VG:

1812 break;

1813 }

1814 }

1815

1816

1817

1818

1819

1820

1821

1822 assert((!RPI.isPaired() ||

1823 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&

1824 "Out of order callee saved regs!");

1825

1826 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||

1827 RPI.Reg1 == AArch64::LR) &&

1828 "FrameRecord must be allocated together with LR");

1829

1830

1831 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||

1832 RPI.Reg2 == AArch64::LR) &&

1833 "FrameRecord must be allocated together with LR");

1834

1835

1836

1840 (RPI.isPaired() &&

1841 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||

1842 RPI.Reg1 + 1 == RPI.Reg2))) &&

1843 "Callee-save registers not saved as adjacent register pair!");

1844

1845 RPI.FrameIdx = CSI[i].getFrameIdx();

1846 if (NeedsWinCFI &&

1847 RPI.isPaired())

1848 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();

1849

1850

1851

1852 if (RPI.isScalable() && ScalableByteOffset % Scale != 0) {

1853 ScalableByteOffset = alignTo(ScalableByteOffset, Scale);

1854 }

1855

1856 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;

1857 assert(OffsetPre % Scale == 0);

1858

1859 if (RPI.isScalable())

1860 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);

1861 else

1862 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);

1863

1864

1865

1867 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||

1868 (IsWindows && RPI.Reg2 == AArch64::LR)))

1869 ByteOffset += StackFillDir * 8;

1870

1871

1872

1873 if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&

1874 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&

1875 ByteOffset % 16 != 0) {

1876 ByteOffset += 8 * StackFillDir;

1878

1879

1880

1882 NeedGapToAlignStack = false;

1883 }

1884

1885 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;

1886 assert(OffsetPost % Scale == 0);

1887

1888

1889 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;

1890

1891

1892

1894 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||

1895 (IsWindows && RPI.Reg2 == AArch64::LR)))

1897 RPI.Offset = Offset / Scale;

1898

1899 assert((!RPI.isPaired() ||

1900 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||

1901 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&

1902 "Offset out of bounds for LDP/STP immediate");

1903

1904 auto isFrameRecord = [&] {

1905 if (RPI.isPaired())

1906 return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR

1907 : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;

1908

1909

1910

1911

1912

1913

1914

1915 return i > 0 && RPI.Reg1 == AArch64::FP &&

1916 CSI[i - 1].getReg() == AArch64::LR;

1917 };

1918

1919

1920

1921 if (NeedsFrameRecord && isFrameRecord())

1923

1925 if (RPI.isPaired())

1926 i += RegInc;

1927 }

1928 if (NeedsWinCFI) {

1929

1930

1931

1932

1933

1936

1937

1938 std::reverse(RegPairs.begin(), RegPairs.end());

1939 }

1940}

1941

1951

1953

1955

1956

1957 MRI.freezeReservedRegs();

1958

1959 if (homogeneousPrologEpilog(MF)) {

1962

1963 for (auto &RPI : RegPairs) {

1964 MIB.addReg(RPI.Reg1);

1965 MIB.addReg(RPI.Reg2);

1966

1967

1968 if (MRI.isReserved(RPI.Reg1))

1969 MBB.addLiveIn(RPI.Reg1);

1970 if (RPI.isPaired() && MRI.isReserved(RPI.Reg2))

1971 MBB.addLiveIn(RPI.Reg2);

1972 }

1973 return true;

1974 }

1975 bool PTrueCreated = false;

1976 for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {

1979 unsigned StrOpc;

1980

1981

1982

1983

1984

1985

1986

1987

1988

1989

1990

1991 unsigned Size = TRI->getSpillSize(*RPI.RC);

1992 Align Alignment = TRI->getSpillAlign(*RPI.RC);

1993 switch (RPI.Type) {

1994 case RegPairInfo::GPR:

1995 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;

1996 break;

1997 case RegPairInfo::FPR64:

1998 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;

1999 break;

2000 case RegPairInfo::FPR128:

2001 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;

2002 break;

2003 case RegPairInfo::ZPR:

2004 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;

2005 break;

2006 case RegPairInfo::PPR:

2007 StrOpc = AArch64::STR_PXI;

2008 break;

2009 case RegPairInfo::VG:

2010 StrOpc = AArch64::STRXui;

2011 break;

2012 }

2013

2016 if (X0Scratch != AArch64::NoRegister)

2020 });

2021

2022 if (Reg1 == AArch64::VG) {

2023

2024 Reg1 = findScratchNonCalleeSaveRegister(&MBB, true);

2025 assert(Reg1 != AArch64::NoRegister);

2031 } else {

2035 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(

2036 AArch64::X0, LiveIn.PhysReg);

2037 })) {

2038 X0Scratch = Reg1;

2040 .addReg(AArch64::X0)

2042 }

2043

2044 RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG;

2046 TRI->getCallPreservedMask(MF, TLI.getLibcallCallingConv(LC));

2052 Reg1 = AArch64::X0;

2053 }

2054 }

2055

2058 if (RPI.isPaired())

2060 dbgs() << ") -> fi#(" << RPI.FrameIdx;

2061 if (RPI.isPaired())

2062 dbgs() << ", " << RPI.FrameIdx + 1;

2063 dbgs() << ")\n";

2064 });

2065

2066 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&

2067 "Windows unwdinding requires a consecutive (FP,LR) pair");

2068

2069

2070

2071 unsigned FrameIdxReg1 = RPI.FrameIdx;

2072 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;

2073 if (NeedsWinCFI && RPI.isPaired()) {

2075 std::swap(FrameIdxReg1, FrameIdxReg2);

2076 }

2077

2078 if (RPI.isPaired() && RPI.isScalable()) {

2084 "Expects SVE2.1 or SME2 target and a predicate register");

2085#ifdef EXPENSIVE_CHECKS

2086 auto IsPPR = [](const RegPairInfo &c) {

2087 return c.Reg1 == RegPairInfo::PPR;

2088 };

2089 auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);

2090 auto IsZPR = [](const RegPairInfo &c) {

2091 return c.Type == RegPairInfo::ZPR;

2092 };

2093 auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);

2094 assert(!(PPRBegin < ZPRBegin) &&

2095 "Expected callee save predicate to be handled first");

2096#endif

2097 if (!PTrueCreated) {

2098 PTrueCreated = true;

2101 }

2103 if (MRI.isReserved(Reg1))

2104 MBB.addLiveIn(Reg1);

2105 if (MRI.isReserved(Reg2))

2106 MBB.addLiveIn(Reg2);

2107 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));

2112 MIB.addReg(AArch64::SP)

2113 .addImm(RPI.Offset / 2)

2114

2119 if (NeedsWinCFI)

2121 } else {

2123 if (MRI.isReserved(Reg1))

2124 MBB.addLiveIn(Reg1);

2125 if (RPI.isPaired()) {

2126 if (MRI.isReserved(Reg2))

2127 MBB.addLiveIn(Reg2);

2132 }

2134 .addReg(AArch64::SP)

2135 .addImm(RPI.Offset)

2136

2141 if (NeedsWinCFI)

2143 }

2144

2146 if (RPI.Type == RegPairInfo::ZPR) {

2148 if (RPI.isPaired())

2150 } else if (RPI.Type == RegPairInfo::PPR) {

2152 if (RPI.isPaired())

2154 }

2155 }

2156 return true;

2157}

2158

2167

2169 DL = MBBI->getDebugLoc();

2170

2172 if (homogeneousPrologEpilog(MF, &MBB)) {

2175 for (auto &RPI : RegPairs) {

2178 }

2179 return true;

2180 }

2181

2182

2183 auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };

2185 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);

2186 std::reverse(PPRBegin, PPREnd);

2187 auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };

2189 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);

2190 std::reverse(ZPRBegin, ZPREnd);

2191

2192 bool PTrueCreated = false;

2193 for (const RegPairInfo &RPI : RegPairs) {

2196

2197

2198

2199

2200

2201

2202

2203

2204

2205 unsigned LdrOpc;

2206 unsigned Size = TRI->getSpillSize(*RPI.RC);

2207 Align Alignment = TRI->getSpillAlign(*RPI.RC);

2208 switch (RPI.Type) {

2209 case RegPairInfo::GPR:

2210 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;

2211 break;

2212 case RegPairInfo::FPR64:

2213 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;

2214 break;

2215 case RegPairInfo::FPR128:

2216 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;

2217 break;

2218 case RegPairInfo::ZPR:

2219 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;

2220 break;

2221 case RegPairInfo::PPR:

2222 LdrOpc = AArch64::LDR_PXI;

2223 break;

2224 case RegPairInfo::VG:

2225 continue;

2226 }

2229 if (RPI.isPaired())

2231 dbgs() << ") -> fi#(" << RPI.FrameIdx;

2232 if (RPI.isPaired())

2233 dbgs() << ", " << RPI.FrameIdx + 1;

2234 dbgs() << ")\n";

2235 });

2236

2237

2238

2239

2240 unsigned FrameIdxReg1 = RPI.FrameIdx;

2241 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;

2242 if (NeedsWinCFI && RPI.isPaired()) {

2244 std::swap(FrameIdxReg1, FrameIdxReg2);

2245 }

2246

2248 if (RPI.isPaired() && RPI.isScalable()) {

2253 "Expects SVE2.1 or SME2 target and a predicate register");

2254#ifdef EXPENSIVE_CHECKS

2255 assert(!(PPRBegin < ZPRBegin) &&

2256 "Expected callee save predicate to be handled first");

2257#endif

2258 if (!PTrueCreated) {

2259 PTrueCreated = true;

2262 }

2264 MIB.addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),

2270 MIB.addReg(AArch64::SP)

2271 .addImm(RPI.Offset / 2)

2272

2277 if (NeedsWinCFI)

2279 } else {

2281 if (RPI.isPaired()) {

2286 }

2288 MIB.addReg(AArch64::SP)

2289 .addImm(RPI.Offset)

2290

2295 if (NeedsWinCFI)

2297 }

2298 }

2299 return true;

2300}

2301

2302

2305 auto *PSV =

2307 if (PSV)

2308 return std::optional(PSV->getFrameIndex());

2309

2313 FI++)

2315 return FI;

2316 }

2317 }

2318

2319 return std::nullopt;

2320}

2321

2322

2325 if (MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)

2326 return std::nullopt;

2327

2329}

2330

2331

2333 return AArch64::PPRRegClass.contains(MI.getOperand(0).getReg());

2334}

2335

2336

2337

2338

2339void AArch64FrameLowering::determineStackHazardSlot(

2342 auto *AFI = MF.getInfo();

2343 if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||

2345 return;

2346

2347

2350 return;

2351

2353

2354

2355

2356 bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {

2357 return AArch64::FPR64RegClass.contains(Reg) ||

2358 AArch64::FPR128RegClass.contains(Reg) ||

2359 AArch64::ZPRRegClass.contains(Reg);

2360 });

2361 bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {

2362 return AArch64::PPRRegClass.contains(Reg);

2363 });

2364 bool HasFPRStackObjects = false;

2365 bool HasPPRStackObjects = false;

2367 enum SlotType : uint8_t {

2369 ZPRorFPR = 1 << 0,

2370 PPR = 1 << 1,

2371 GPR = 1 << 2,

2373 };

2374

2375

2376

2378 for (auto &MBB : MF) {

2379 for (auto &MI : MBB) {

2381 if (!FI || FI < 0 || FI > int(SlotTypes.size()))

2382 continue;

2384 SlotTypes[*FI] |=

2385 isPPRAccess(MI) ? SlotType::PPR : SlotType::ZPRorFPR;

2386 } else {

2388 ? SlotType::ZPRorFPR

2389 : SlotType::GPR;

2390 }

2391 }

2392 }

2393

2394 for (int FI = 0; FI < int(SlotTypes.size()); ++FI) {

2395 HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;

2396

2397

2398 if (SlotTypes[FI] == SlotType::PPR) {

2400 HasPPRStackObjects = true;

2401 }

2402 }

2403 }

2404

2405 if (HasFPRCSRs || HasFPRStackObjects) {

2407 LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size "

2408 << StackHazardSize << "\n");

2410 }

2411

2413 return;

2414

2419 LLVM_DEBUG(dbgs() << "Using SplitSVEObjects for SVE CC function\n");

2420 return;

2421 }

2422

2423

2424

2425 LLVM_DEBUG(dbgs() << "Determining if SplitSVEObjects should be used in "

2426 "non-SVE CC function...\n");

2427

2428

2429

2433 << "Calling convention is not supported with SplitSVEObjects\n");

2434 return;

2435 }

2436

2437 if (!HasPPRCSRs && !HasPPRStackObjects) {

2439 dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");

2440 return;

2441 }

2442

2443 if (!HasFPRCSRs && !HasFPRStackObjects) {

2446 << "Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");

2447 return;

2448 }

2449

2450 [[maybe_unused]] const AArch64Subtarget &Subtarget =

2451 MF.getSubtarget();

2453 "Expected SVE to be available for PPRs");

2454

2455 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

2456

2457

2458

2459 BitVector FPRZRegs(SavedRegs.size());

2460 for (size_t Reg = 0, E = SavedRegs.size(); HasFPRCSRs && Reg < E; ++Reg) {

2461 BitVector::reference RegBit = SavedRegs[Reg];

2462 if (!RegBit)

2463 continue;

2464 unsigned SubRegIdx = 0;

2465 if (AArch64::FPR64RegClass.contains(Reg))

2466 SubRegIdx = AArch64::dsub;

2467 else if (AArch64::FPR128RegClass.contains(Reg))

2468 SubRegIdx = AArch64::zsub;

2469 else

2470 continue;

2471

2472 RegBit = false;

2473

2475 TRI->getMatchingSuperReg(Reg, SubRegIdx, &AArch64::ZPRRegClass);

2476 FPRZRegs.set(ZReg);

2477 }

2478 SavedRegs |= FPRZRegs;

2479

2482 }

2483}

2484

2488

2489

2491 return;

2492

2494

2498 unsigned UnspilledCSGPR = AArch64::NoRegister;

2499 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;

2500

2503

2505 RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() : MCRegister();

2506

2507 unsigned ExtraCSSpill = 0;

2508 bool HasUnpairedGPR64 = false;

2509 bool HasPairZReg = false;

2510 BitVector UserReservedRegs = RegInfo->getUserReservedRegs(MF);

2511 BitVector ReservedRegs = RegInfo->getReservedRegs(MF);

2512

2513

2514 for (unsigned i = 0; CSRegs[i]; ++i) {

2516

2517

2518 if (Reg == BasePointerReg)

2519 SavedRegs.set(Reg);

2520

2521

2522

2523 if (UserReservedRegs[Reg]) {

2524 SavedRegs.reset(Reg);

2525 continue;

2526 }

2527

2528 bool RegUsed = SavedRegs.test(Reg);

2530 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);

2531 if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||

2532 AArch64::FPR128RegClass.contains(Reg)) {

2533

2534

2535 if (HasUnpairedGPR64)

2536 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];

2537 else

2538 PairedReg = CSRegs[i ^ 1];

2539 }

2540

2541

2542

2543

2544

2545 if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {

2546 PairedReg = AArch64::NoRegister;

2547 HasUnpairedGPR64 = true;

2548 }

2549 assert(PairedReg == AArch64::NoRegister ||

2550 AArch64::GPR64RegClass.contains(Reg, PairedReg) ||

2551 AArch64::FPR64RegClass.contains(Reg, PairedReg) ||

2552 AArch64::FPR128RegClass.contains(Reg, PairedReg));

2553

2554 if (!RegUsed) {

2555 if (AArch64::GPR64RegClass.contains(Reg) && !ReservedRegs[Reg]) {

2556 UnspilledCSGPR = Reg;

2557 UnspilledCSGPRPaired = PairedReg;

2558 }

2559 continue;

2560 }

2561

2562

2563

2564

2565 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&

2566 !SavedRegs.test(PairedReg)) {

2567 SavedRegs.set(PairedReg);

2568 if (AArch64::GPR64RegClass.contains(PairedReg) &&

2569 !ReservedRegs[PairedReg])

2570 ExtraCSSpill = PairedReg;

2571 }

2572

2573 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&

2574 SavedRegs.test(CSRegs[i ^ 1]));

2575 }

2576

2579

2580

2582 if (PnReg.isValid())

2584

2588 SavedRegs.set(AArch64::P8);

2590 }

2591

2593 "Predicate cannot be a reserved register");

2594 }

2595

2598

2599

2600

2601

2602

2603 SavedRegs.set(AArch64::X18);

2604 }

2605

2606

2607

2608

2609 determineStackHazardSlot(MF, SavedRegs);

2610

2611

2612 unsigned CSStackSize = 0;

2613 unsigned ZPRCSStackSize = 0;

2614 unsigned PPRCSStackSize = 0;

2616 for (unsigned Reg : SavedRegs.set_bits()) {

2617 auto *RC = TRI->getMinimalPhysRegClass(MCRegister(Reg));

2618 assert(RC && "expected register class!");

2619 auto SpillSize = TRI->getSpillSize(*RC);

2620 bool IsZPR = AArch64::ZPRRegClass.contains(Reg);

2621 bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);

2622 if (IsZPR)

2623 ZPRCSStackSize += SpillSize;

2624 else if (IsPPR)

2625 PPRCSStackSize += SpillSize;

2626 else

2627 CSStackSize += SpillSize;

2628 }

2629

2630

2631

2632

2633 unsigned NumSavedRegs = SavedRegs.count();

2634

2635

2638

2639

2640

2642 CSStackSize += 8;

2643

2644

2646 SavedRegs.set(AArch64::LR);

2647

2648

2650 if (hasFP(MF) ||

2651 windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {

2652 SavedRegs.set(AArch64::FP);

2653 SavedRegs.set(AArch64::LR);

2654 }

2655

2657 dbgs() << "*** determineCalleeSaves\nSaved CSRs:";

2658 for (unsigned Reg : SavedRegs.set_bits())

2660 dbgs() << "\n";

2661 });

2662

2663

2664 auto [ZPRLocalStackSize, PPRLocalStackSize] =

2666 uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;

2668 alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);

2669 bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;

2670

2671

2672

2674

2675

2676

2677

2678 int64_t CalleeStackUsed = 0;

2681 if (FixedOff > CalleeStackUsed)

2682 CalleeStackUsed = FixedOff;

2683 }

2684

2685

2686 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +

2687 CalleeStackUsed) > EstimatedStackSizeLimit;

2688 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))

2690

2691

2692

2693

2694

2695

2696

2697 if (BigStack) {

2698 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {

2700 << " to get a scratch register.\n");

2701 SavedRegs.set(UnspilledCSGPR);

2702 ExtraCSSpill = UnspilledCSGPR;

2703

2704

2705

2706

2707 if (producePairRegisters(MF)) {

2708 if (UnspilledCSGPRPaired == AArch64::NoRegister) {

2709

2711 SavedRegs.reset(UnspilledCSGPR);

2712 ExtraCSSpill = AArch64::NoRegister;

2713 }

2714 } else

2715 SavedRegs.set(UnspilledCSGPRPaired);

2716 }

2717 }

2718

2719

2720

2724 unsigned Size = TRI->getSpillSize(RC);

2725 Align Alignment = TRI->getSpillAlign(RC);

2727 RS->addScavengingFrameIndex(FI);

2728 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI

2729 << " as the emergency spill slot.\n");

2730 }

2731 }

2732

2733

2734 CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);

2735

2736

2737

2739 CSStackSize += 8;

2740

2743 << EstimatedStackSize + AlignedCSStackSize << " bytes.\n");

2744

2747 "Should not invalidate callee saved info");

2748

2749

2750

2754}

2755

2758 std::vector &CSI) const {

2761

2762

2763

2764

2765

2766 if (NeedsWinCFI)

2767 std::reverse(CSI.begin(), CSI.end());

2768

2769 if (CSI.empty())

2770 return true;

2771

2772

2773

2776

2782 }

2783

2784

2787 auto It =

2788 find_if(CSI, [](auto &Info) { return Info.getReg() == AArch64::LR; });

2789 if (It != CSI.end())

2790 CSI.insert(It, VGInfo);

2791 else

2792 CSI.push_back(VGInfo);

2793 }

2794

2796 int HazardSlotIndex = std::numeric_limits::max();

2797 for (auto &CS : CSI) {

2800

2801

2805 assert(HazardSlotIndex == std::numeric_limits::max() &&

2806 "Unexpected register order for hazard slot");

2808 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex

2809 << "\n");

2812 }

2813

2814 unsigned Size = RegInfo->getSpillSize(*RC);

2815 Align Alignment(RegInfo->getSpillAlign(*RC));

2817 CS.setFrameIdx(FrameIdx);

2819

2820

2822 Reg == AArch64::FP) {

2826 }

2827 LastReg = Reg;

2828 }

2829

2830

2832 HazardSlotIndex == std::numeric_limits::max()) {

2834 LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex

2835 << "\n");

2838 }

2839

2840 return true;

2841}

2842

2846

2847

2848

2849

2851 return false;

2852

2853

2855 return false;

2857}

2858

2859

2861 int &Min, int &Max) {

2862 Min = std::numeric_limits::max();

2863 Max = std::numeric_limits::min();

2864

2866 return false;

2867

2869 for (auto &CS : CSI) {

2870 if (AArch64::ZPRRegClass.contains(CS.getReg()) ||

2871 AArch64::PPRRegClass.contains(CS.getReg())) {

2872 assert((Max == std::numeric_limits::min() ||

2873 Max + 1 == CS.getFrameIdx()) &&

2874 "SVE CalleeSaves are not consecutive");

2875 Min = std::min(Min, CS.getFrameIdx());

2876 Max = std::max(Max, CS.getFrameIdx());

2877 }

2878 }

2879 return Min != std::numeric_limits::max();

2880}

2881

2886

2888

2889

2890

2891

2892 uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;

2894 AFI->hasSplitSVEObjects() ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;

2895

2896#ifndef NDEBUG

2897

2900 "SVE vectors should never be passed on the stack by value, only by "

2901 "reference.");

2902#endif

2903

2904 auto AllocateObject = [&](int FI) {

2906 ? ZPRStackTop

2907 : PPRStackTop;

2908

2909

2910

2911

2913 if (Alignment > Align(16))

2915 "Alignment of scalable vectors > 16 bytes is not yet supported");

2916

2918 StackTop = alignTo(StackTop, Alignment);

2919

2920 assert(StackTop < (uint64_t)std::numeric_limits<int64_t>::max() &&

2921 "SVE StackTop far too large?!");

2922

2923 int64_t Offset = -int64_t(StackTop);

2926

2928 };

2929

2930

2931 int MinCSFrameIndex, MaxCSFrameIndex;

2933 for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)

2934 AllocateObject(FI);

2935 }

2936

2937

2938 PPRStackTop = alignTo(PPRStackTop, Align(16U));

2939 ZPRStackTop = alignTo(ZPRStackTop, Align(16U));

2940

2941

2943

2944

2945

2946 int StackProtectorFI = -1;

2950 ObjectsToAllocate.push_back(StackProtectorFI);

2951 }

2952

2956 continue;

2957

2960 continue;

2961

2962 ObjectsToAllocate.push_back(FI);

2963 }

2964

2965

2966 for (unsigned FI : ObjectsToAllocate)

2967 AllocateObject(FI);

2968

2969 PPRStackTop = alignTo(PPRStackTop, Align(16U));

2970 ZPRStackTop = alignTo(ZPRStackTop, Align(16U));

2971

2973 AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize);

2974

2975 return SVEStack;

2976}

2977

2981 "Upwards growing stack unsupported");

2982

2984

2985

2986

2988 return;

2989

2992

2993

2994

2996 int64_t CurrentOffset =

3000 int FrameIndex = H.CatchObj.FrameIndex;

3001 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {

3002 CurrentOffset =

3006 }

3007 }

3008 }

3009

3010

3011

3012 int64_t UnwindHelpOffset = alignTo(CurrentOffset + 8, Align(16));

3013 assert(UnwindHelpOffset == getFixedObjectSize(MF, AFI, true,

3014 false) &&

3015 "UnwindHelpOffset must be at the start of the fixed object area");

3016 int UnwindHelpFI = MFI.CreateFixedObject( 8, -UnwindHelpOffset,

3017 false);

3018 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;

3019

3021 auto MBBI = MBB.begin();

3024

3025

3026

3028 RS->enterBasicBlockEnd(MBB);

3029 RS->backward(MBBI);

3030 Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);

3031 assert(DstReg && "There must be a free register after frame setup");

3038}

3039

3040namespace {

3041struct TagStoreInstr {

3046};

3047

3048class TagStoreEdit {

3049 MachineFunction *MF;

3050 MachineBasicBlock *MBB;

3051 MachineRegisterInfo *MRI;

3052

3054

3056

3057

3058

3060 StackOffset FrameRegOffset;

3061 int64_t Size;

3062

3063

3064 std::optional<int64_t> FrameRegUpdate;

3065

3066 unsigned FrameRegUpdateFlags;

3067

3068

3069 bool ZeroData;

3071

3074

3075public:

3076 TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)

3077 : MBB(MBB), ZeroData(ZeroData) {

3080 }

3081

3082

3083 void addInstruction(TagStoreInstr I) {

3085 TagStores.back().Offset + TagStores.back().Size == I.Offset) &&

3086 "Non-adjacent tag store instructions.");

3088 }

3089 void clear() { TagStores.clear(); }

3090

3091

3092

3094 const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);

3095};

3096

3100

3101 const int64_t kMinOffset = -256 * 16;

3102 const int64_t kMaxOffset = 255 * 16;

3103

3105 int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();

3106 if (BaseRegOffsetBytes < kMinOffset ||

3107 BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset ||

3108

3109

3110

3111 BaseRegOffsetBytes % 16 != 0) {

3112 Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);

3116 BaseRegOffsetBytes = 0;

3117 }

3118

3120 while (Size) {

3121 int64_t InstrSize = (Size > 16) ? 32 : 16;

3122 unsigned Opcode =

3123 InstrSize == 16

3124 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)

3126 assert(BaseRegOffsetBytes % 16 == 0);

3128 .addReg(AArch64::SP)

3130 .addImm(BaseRegOffsetBytes / 16)

3132

3133

3134 if (BaseRegOffsetBytes == 0)

3135 LastI = I;

3136 BaseRegOffsetBytes += InstrSize;

3137 Size -= InstrSize;

3138 }

3139

3140 if (LastI)

3142}

3143

3147

3149 ? FrameReg

3150 : MRI->createVirtualRegister(&AArch64::GPR64RegClass);

3151 Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);

3152

3154

3155 int64_t LoopSize = Size;

3156

3157

3158 if (FrameRegUpdate && *FrameRegUpdate)

3159 LoopSize -= LoopSize % 32;

3161 TII->get(ZeroData ? AArch64::STZGloop_wback

3162 : AArch64::STGloop_wback))

3168 if (FrameRegUpdate)

3169 LoopI->setFlags(FrameRegUpdateFlags);

3170

3171 int64_t ExtraBaseRegUpdate =

3172 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;

3173 LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize

3174 << ", Size=" << Size

3175 << ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate

3176 << ", FrameRegUpdate=" << FrameRegUpdate

3177 << ", FrameRegOffset.getFixed()="

3178 << FrameRegOffset.getFixed() << "\n");

3179 if (LoopSize < Size) {

3180 assert(FrameRegUpdate);

3182

3183 int64_t STGOffset = ExtraBaseRegUpdate + 16;

3184 assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&

3185 "STG immediate out of range");

3187 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))

3191 .addImm(STGOffset / 16)

3194 } else if (ExtraBaseRegUpdate) {

3195

3196 int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);

3197 assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range");

3199 *MBB, InsertI, DL,

3200 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))

3203 .addImm(AddSubOffset)

3206 }

3207}

3208

3209

3210

3211

3213 int64_t Size, int64_t *TotalOffset) {

3215 if ((MI.getOpcode() == AArch64::ADDXri ||

3216 MI.getOpcode() == AArch64::SUBXri) &&

3217 MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {

3219 int64_t Offset = MI.getOperand(2).getImm() << Shift;

3220 if (MI.getOpcode() == AArch64::SUBXri)

3223

3224

3225

3226

3227

3228

3229

3230

3231 const int64_t kMaxOffset = 4080 - 16;

3232

3233 const int64_t kMinOffset = -4095;

3234 if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&

3235 PostOffset % 16 == 0) {

3236 *TotalOffset = Offset;

3237 return true;

3238 }

3239 }

3240 return false;

3241}

3242

3245 MemRefs.clear();

3246 for (auto &TS : TSE) {

3248

3249

3250 if (MI->memoperands_empty()) {

3251 MemRefs.clear();

3252 return;

3253 }

3254 MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());

3255 }

3256}

3257

3260 bool TryMergeSPUpdate) {

3261 if (TagStores.empty())

3262 return;

3263 TagStoreInstr &FirstTagStore = TagStores[0];

3264 TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];

3265 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;

3266 DL = TagStores[0].MI->getDebugLoc();

3267

3270 *MF, FirstTagStore.Offset, false ,

3272 false, true);

3273 FrameReg = Reg;

3274 FrameRegUpdate = std::nullopt;

3275

3276 mergeMemRefs(TagStores, CombinedMemRefs);

3277

3279 dbgs() << "Replacing adjacent STG instructions:\n";

3280 for (const auto &Instr : TagStores) {

3282 }

3283 });

3284

3285

3286

3289 if (TagStores.size() < 2)

3290 return;

3291 emitUnrolled(InsertI);

3292 } else {

3294 int64_t TotalOffset = 0;

3295 if (TryMergeSPUpdate) {

3296

3297

3298

3299

3300

3301 if (InsertI != MBB->end() &&

3302 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,

3303 &TotalOffset)) {

3304 UpdateInstr = &*InsertI++;

3305 LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "

3306 << *UpdateInstr);

3307 }

3308 }

3309

3310 if (!UpdateInstr && TagStores.size() < 2)

3311 return;

3312

3313 if (UpdateInstr) {

3314 FrameRegUpdate = TotalOffset;

3315 FrameRegUpdateFlags = UpdateInstr->getFlags();

3316 }

3317 emitLoop(InsertI);

3318 if (UpdateInstr)

3320 }

3321

3322 for (auto &TS : TagStores)

3323 TS.MI->eraseFromParent();

3324}

3325

3327 int64_t &Size, bool &ZeroData) {

3330

3331 unsigned Opcode = MI.getOpcode();

3332 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||

3333 Opcode == AArch64::STZ2Gi);

3334

3335 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {

3336 if (MI.getOperand(0).isDead() || MI.getOperand(1).isDead())

3337 return false;

3338 if (MI.getOperand(2).isImm() || MI.getOperand(3).isFI())

3339 return false;

3341 Size = MI.getOperand(2).getImm();

3342 return true;

3343 }

3344

3345 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)

3347 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)

3349 else

3350 return false;

3351

3352 if (MI.getOperand(0).getReg() != AArch64::SP || MI.getOperand(1).isFI())

3353 return false;

3354

3356 16 * MI.getOperand(2).getImm();

3357 return true;

3358}

3359

3360

3361

3362

3363

3364

3365

3369 bool FirstZeroData;

3375 return II;

3376 if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))

3377 return II;

3378

3381

3382 constexpr int kScanLimit = 10;

3385 NextI != E && Count < kScanLimit; ++NextI) {

3387 bool ZeroData;

3389

3390

3391

3392

3393

3394 if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {

3395 if (ZeroData != FirstZeroData)

3396 break;

3398 continue;

3399 }

3400

3401

3402

3403 if (MI.isTransient())

3405

3406

3409 break;

3410

3411

3412 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() || MI.isCall())

3413 break;

3414 }

3415

3416

3418

3419

3420

3421

3422

3423

3424

3425

3426

3428 LiveRegs.addLiveOuts(*MBB);

3431 if (MI == InsertI)

3432 break;

3433 LiveRegs.stepBackward(*I);

3434 }

3435 InsertI++;

3436 if (LiveRegs.contains(AArch64::NZCV))

3437 return InsertI;

3438

3440 [](const TagStoreInstr &Left, const TagStoreInstr &Right) {

3441 return Left.Offset < Right.Offset;

3442 });

3443

3444

3445 int64_t CurOffset = Instrs[0].Offset;

3446 for (auto &Instr : Instrs) {

3447 if (CurOffset > Instr.Offset)

3448 return NextI;

3449 CurOffset = Instr.Offset + Instr.Size;

3450 }

3451

3452

3453

3454 TagStoreEdit TSE(MBB, FirstZeroData);

3455 std::optional<int64_t> EndOffset;

3456 for (auto &Instr : Instrs) {

3457 if (EndOffset && *EndOffset != Instr.Offset) {

3458

3459 TSE.emitCode(InsertI, TFI, false);

3460 TSE.clear();

3461 }

3462

3463 TSE.addInstruction(Instr);

3464 EndOffset = Instr.Offset + Instr.Size;

3465 }

3466

3468

3469 TSE.emitCode(

3470 InsertI, TFI,

3472

3473 return InsertI;

3474}

3475}

3476

3479 for (auto &BB : MF)

3482 II = tryMergeAdjacentSTG(II, this, RS);

3483 }

3484

3485

3486

3487

3489 shouldSignReturnAddressEverywhere(MF))

3491}

3492

3493

3494

3495

3498 bool IgnoreSPUpdates) const {

3500 if (IgnoreSPUpdates) {

3501 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "

3503 FrameReg = AArch64::SP;

3505 }

3506

3507

3512

3513 FrameReg = AArch64::SP;

3515}

3516

3517

3518

3523

3524

3525

3528

3529 unsigned CSSize =

3531

3534}

3535

3536namespace {

3537struct FrameObject {

3538 bool IsValid = false;

3539

3540 int ObjectIndex = 0;

3541

3542 int GroupIndex = -1;

3543

3544 bool ObjectFirst = false;

3545

3546

3547 bool GroupFirst = false;

3548

3549

3550

3552 enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };

3553};

3554

3555class GroupBuilder {

3556 SmallVector<int, 8> CurrentMembers;

3557 int NextGroupIndex = 0;

3558 std::vector &Objects;

3559

3560public:

3561 GroupBuilder(std::vector &Objects) : Objects(Objects) {}

3562 void AddMember(int Index) { CurrentMembers.push_back(Index); }

3563 void EndCurrentGroup() {

3564 if (CurrentMembers.size() > 1) {

3565

3566

3567

3569 for (int Index : CurrentMembers) {

3570 Objects[Index].GroupIndex = NextGroupIndex;

3572 }

3574 NextGroupIndex++;

3575 }

3576 CurrentMembers.clear();

3577 }

3578};

3579

3580bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {

3581

3582

3583

3584

3585

3586

3587

3588

3589

3590

3591

3592

3593

3594

3595

3596

3597

3598

3599

3600

3601

3602 return std::make_tuple(A.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst,

3603 A.GroupIndex, A.ObjectIndex) <

3604 std::make_tuple(B.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst,

3605 B.GroupIndex, B.ObjectIndex);

3606}

3607}

3608

3612

3614 ObjectsToAllocate.empty())

3615 return;

3616

3619 for (auto &Obj : ObjectsToAllocate) {

3620 FrameObjects[Obj].IsValid = true;

3621 FrameObjects[Obj].ObjectIndex = Obj;

3622 }

3623

3624

3625

3626 GroupBuilder GB(FrameObjects);

3627 for (auto &MBB : MF) {

3628 for (auto &MI : MBB) {

3629 if (MI.isDebugInstr())

3630 continue;

3631

3634 if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {

3637 FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;

3638 else

3639 FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;

3640 }

3641 }

3642

3644 switch (MI.getOpcode()) {

3645 case AArch64::STGloop:

3646 case AArch64::STZGloop:

3648 break;

3649 case AArch64::STGi:

3650 case AArch64::STZGi:

3651 case AArch64::ST2Gi:

3652 case AArch64::STZ2Gi:

3654 break;

3655 default:

3657 }

3658

3659 int TaggedFI = -1;

3662 if (MO.isFI()) {

3665 FrameObjects[FI].IsValid)

3666 TaggedFI = FI;

3667 }

3668 }

3669

3670

3671

3672 if (TaggedFI >= 0)

3673 GB.AddMember(TaggedFI);

3674 else

3675 GB.EndCurrentGroup();

3676 }

3677

3678 GB.EndCurrentGroup();

3679 }

3680

3683 FrameObject::AccessHazard;

3684

3685 for (auto &Obj : FrameObjects)

3686 if (!Obj.Accesses ||

3687 Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))

3688 Obj.Accesses = FrameObject::AccessGPR;

3689 }

3690

3691

3692

3693

3694

3696 if (TBPI) {

3697 FrameObjects[*TBPI].ObjectFirst = true;

3698 FrameObjects[*TBPI].GroupFirst = true;

3699 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;

3700 if (FirstGroupIndex >= 0)

3701 for (FrameObject &Object : FrameObjects)

3702 if (Object.GroupIndex == FirstGroupIndex)

3703 Object.GroupFirst = true;

3704 }

3705

3707

3708 int i = 0;

3709 for (auto &Obj : FrameObjects) {

3710

3711 if (!Obj.IsValid)

3712 break;

3713 ObjectsToAllocate[i++] = Obj.ObjectIndex;

3714 }

3715

3717 dbgs() << "Final frame order:\n";

3718 for (auto &Obj : FrameObjects) {

3719 if (!Obj.IsValid)

3720 break;

3721 dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;

3722 if (Obj.ObjectFirst)

3723 dbgs() << ", first";

3724 if (Obj.GroupFirst)

3725 dbgs() << ", group-first";

3726 dbgs() << "\n";

3727 }

3728 });

3729}

3730

3731

3732

3733

3734

3736AArch64FrameLowering::inlineStackProbeLoopExactMultiple(

3744

3747 MF.insert(MBBInsertPoint, LoopMBB);

3749 MF.insert(MBBInsertPoint, ExitMBB);

3750

3751

3752

3756

3757 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))

3758 .addReg(AArch64::XZR)

3759 .addReg(AArch64::SP)

3762

3763 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),

3764 AArch64::XZR)

3765 .addReg(AArch64::SP)

3769

3770 BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))

3774

3777

3780 MBB.addSuccessor(LoopMBB);

3781

3783

3784 return ExitMBB->begin();

3785}

3786

3787void AArch64FrameLowering::inlineStackProbeFixed(

3792 const AArch64InstrInfo *TII =

3793 MF.getSubtarget().getInstrInfo();

3794 AArch64FunctionInfo *AFI = MF.getInfo();

3797

3799 int64_t ProbeSize = MF.getInfo()->getStackProbeSize();

3800 int64_t NumBlocks = FrameSize / ProbeSize;

3801 int64_t ResidualSize = FrameSize % ProbeSize;

3802

3803 LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "

3804 << NumBlocks << " blocks of " << ProbeSize

3805 << " bytes, plus " << ResidualSize << " bytes\n");

3806

3807

3808

3810 for (int i = 0; i < NumBlocks; ++i) {

3811

3812

3816 EmitAsyncCFI && !HasFP, CFAOffset);

3818

3820 .addReg(AArch64::XZR)

3821 .addReg(AArch64::SP)

3824 }

3825 } else if (NumBlocks != 0) {

3826

3827

3831 EmitAsyncCFI && !HasFP, CFAOffset);

3833 MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);

3835 if (EmitAsyncCFI && !HasFP) {

3836

3838 .buildDefCFARegister(AArch64::SP);

3839 }

3840 }

3841

3842 if (ResidualSize != 0) {

3843

3844

3848 EmitAsyncCFI && !HasFP, CFAOffset);

3850

3852 .addReg(AArch64::XZR)

3853 .addReg(AArch64::SP)

3856 }

3857 }

3858}

3859

3860void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,

3862

3863

3864

3865 SmallVector<MachineInstr *, 4> ToReplace;

3866 for (MachineInstr &MI : MBB)

3867 if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||

3868 MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)

3870

3871 for (MachineInstr *MI : ToReplace) {

3872 if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {

3873 Register ScratchReg = MI->getOperand(0).getReg();

3874 int64_t FrameSize = MI->getOperand(1).getImm();

3875 StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(),

3876 MI->getOperand(3).getImm());

3877 inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,

3878 CFAOffset);

3879 } else {

3880 assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&

3881 "Stack probe pseudo-instruction expected");

3882 const AArch64InstrInfo *TII =

3883 MI->getMF()->getSubtarget().getInstrInfo();

3884 Register TargetReg = MI->getOperand(0).getReg();

3885 (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);

3886 }

3887 MI->eraseFromParent();

3888 }

3889}

3890

3898

3903

3905

3907 return std::make_tuple(start(), Idx) <

3908 std::make_tuple(Rhs.start(), Rhs.Idx);

3909 }

3910

3917

3920

3924 return "FPR";

3926 return "PPR";

3928 return "GPR";

3930 return "NA";

3931 default:

3932 return "Mixed";

3933 }

3934 }

3935

3938 << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();

3939 if (Offset.getScalable())

3940 OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()

3941 << " * vscale";

3942 OS << "]";

3943 }

3944};

3945

3950

3951void AArch64FrameLowering::emitRemarks(

3953

3954 auto *AFI = MF.getInfo();

3956 return;

3957

3959 const uint64_t HazardSize =

3961

3962 if (HazardSize == 0)

3963 return;

3964

3965 const MachineFrameInfo &MFI = MF.getFrameInfo();

3966

3968 return;

3969

3970 std::vector StackAccesses(MFI.getNumObjects());

3971

3972 size_t NumFPLdSt = 0;

3973 size_t NumNonFPLdSt = 0;

3974

3975

3976 for (const MachineBasicBlock &MBB : MF) {

3977 for (const MachineInstr &MI : MBB) {

3978 if (MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)

3979 continue;

3980 for (MachineMemOperand *MMO : MI.memoperands()) {

3981 std::optional FI = getMMOFrameID(MMO, MFI);

3983 int FrameIdx = *FI;

3984

3987 StackAccesses[ArrIdx].Idx = FrameIdx;

3988 StackAccesses[ArrIdx].Offset =

3990 StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);

3991 }

3992

3998

3999 StackAccesses[ArrIdx].AccessTypes |= RegTy;

4000

4002 ++NumFPLdSt;

4003 else

4004 ++NumNonFPLdSt;

4005 }

4006 }

4007 }

4008 }

4009

4010 if (NumFPLdSt == 0 || NumNonFPLdSt == 0)

4011 return;

4012

4014 llvm::erase_if(StackAccesses, [](const StackAccess &S) {

4016 });

4017

4020

4021 if (StackAccesses.front().isMixed())

4022 MixedObjects.push_back(&StackAccesses.front());

4023

4024 for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());

4025 It != End; ++It) {

4026 const auto &First = *It;

4027 const auto &Second = *(It + 1);

4028

4029 if (Second.isMixed())

4030 MixedObjects.push_back(&Second);

4031

4032 if ((First.isSME() && Second.isCPU()) ||

4033 (First.isCPU() && Second.isSME())) {

4034 uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());

4035 if (Distance < HazardSize)

4037 }

4038 }

4039

4040 auto EmitRemark = [&](llvm::StringRef Str) {

4041 ORE->emit([&]() {

4042 auto R = MachineOptimizationRemarkAnalysis(

4043 "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());

4044 return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;

4045 });

4046 };

4047

4048 for (const auto &P : HazardPairs)

4049 EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());

4050

4051 for (const auto *Obj : MixedObjects)

4052 EmitRemark(

4053 formatv("{0} accessed by both GP and FP instructions", *Obj).str());

4054}

unsigned const MachineRegisterInfo * MRI

static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)

Definition AArch64FrameLowering.cpp:878

static const unsigned DefaultSafeSPDisplacement

This is the biggest offset to the stack pointer we can encode in aarch64 instructions (without using ...

Definition AArch64FrameLowering.cpp:447

static bool produceCompactUnwindFrame(const AArch64FrameLowering &, MachineFunction &MF)

Definition AArch64FrameLowering.cpp:1544

static cl::opt< bool > StackTaggingMergeSetTag("stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden)

bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, MachineFunction &MF)

Definition AArch64FrameLowering.cpp:1648

static std::optional< int > getLdStFrameID(const MachineInstr &MI, const MachineFrameInfo &MFI)

Definition AArch64FrameLowering.cpp:2323

static cl::opt< bool > SplitSVEObjects("aarch64-split-sve-objects", cl::desc("Split allocation of ZPR & PPR objects"), cl::init(true), cl::Hidden)

static cl::opt< bool > StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", cl::init(false), cl::Hidden)

void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI, SmallVectorImpl< RegPairInfo > &RegPairs, bool NeedsFrameRecord)

Definition AArch64FrameLowering.cpp:1665

static cl::opt< bool > OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden)

static bool invalidateRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, bool IsFirst, const TargetRegisterInfo *TRI)

Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.

Definition AArch64FrameLowering.cpp:1599

static cl::opt< bool > DisableMultiVectorSpillFill("aarch64-disable-multivector-spill-fill", cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden)

cl::opt< bool > EnableHomogeneousPrologEpilog("homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)"))

static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL, const MachineFunction &MF)

Definition AArch64FrameLowering.cpp:368

static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg)

Definition AArch64FrameLowering.cpp:1534

static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, AssignObjectOffsets AssignOffsets)

Process all the SVE stack objects and the SVE stack size and offsets for each object.

Definition AArch64FrameLowering.cpp:2882

static bool isTargetWindows(const MachineFunction &MF)

Definition AArch64FrameLowering.cpp:1171

static unsigned estimateRSStackSizeLimit(MachineFunction &MF)

Look at each instruction that references stack frames and return the stack size limit beyond which so...

Definition AArch64FrameLowering.cpp:452

static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max)

returns true if there are any SVE callee saves.

Definition AArch64FrameLowering.cpp:2860

static cl::opt< unsigned > StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), cl::Hidden)

static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE)

Definition AArch64FrameLowering.cpp:740

static unsigned getStackHazardSize(const MachineFunction &MF)

Definition AArch64FrameLowering.cpp:344

static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, bool IsFirst, const TargetRegisterInfo *TRI)

Definition AArch64FrameLowering.cpp:1556

MCRegister findFreePredicateReg(BitVector &SavedRegs)

Definition AArch64FrameLowering.cpp:1637

static bool isPPRAccess(const MachineInstr &MI)

Definition AArch64FrameLowering.cpp:2332

static std::optional< int > getMMOFrameID(MachineMemOperand *MMO, const MachineFrameInfo &MFI)

Definition AArch64FrameLowering.cpp:2303

AssignObjectOffsets

Definition AArch64FrameLowering.cpp:337

@ Yes

Definition AArch64FrameLowering.cpp:337

@ No

Definition AArch64FrameLowering.cpp:337

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...

const TargetInstrInfo & TII

static const int kSetTagLoopThreshold

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

This file contains the simple types necessary to represent the attributes associated with functions a...

#define CASE(ATTRNAME, AANAME,...)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

DXIL Forward Handle Accesses

static std::string getTypeString(Type *T)

This file implements the LivePhysRegs utility for tracking liveness of physical registers.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

uint64_t IntrinsicInst * II

This file declares the machine register scavenger class.

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...

This file defines the SmallVector class.

void emitEpilogue()

Emit the epilogue.

StackOffset getSVEStackSize(const MachineFunction &MF) const

Returns the size of the entire SVE stackframe (PPRs + ZPRs).

StackOffset getZPRStackSize(const MachineFunction &MF) const

Returns the size of the entire ZPR stackframe (calleesaves + spills).

Definition AArch64FrameLowering.cpp:349

void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override

processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...

Definition AArch64FrameLowering.cpp:3477

MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override

This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...

Definition AArch64FrameLowering.cpp:644

bool canUseAsPrologue(const MachineBasicBlock &MBB) const override

Check whether or not the given MBB can be used as a prologue for the target.

Definition AArch64FrameLowering.cpp:922

bool enableStackSlotScavenging(const MachineFunction &MF) const override

Returns true if the stack slot holes in the fixed and callee-save stack area should be used when allo...

Definition AArch64FrameLowering.cpp:2843

bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override

assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.

Definition AArch64FrameLowering.cpp:2756

bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...

Definition AArch64FrameLowering.cpp:1942

bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...

Definition AArch64FrameLowering.cpp:2159

bool enableFullCFIFixup(const MachineFunction &MF) const override

enableFullCFIFixup - Returns true if we may need to fix the unwind information such that it is accura...

Definition AArch64FrameLowering.cpp:1225

StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI) const override

getFrameIndexReferenceFromSP - This method returns the offset from the stack pointer to the slot of t...

Definition AArch64FrameLowering.cpp:1246

bool enableCFIFixup(const MachineFunction &MF) const override

Returns true if we may need to fix the unwind information for the function.

Definition AArch64FrameLowering.cpp:1220

StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override

getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...

Definition AArch64FrameLowering.cpp:1309

TargetStackID::Value getStackIDForScalableVectors() const override

Returns the StackID that scalable vectors should be associated with.

Definition AArch64FrameLowering.cpp:478

friend class AArch64PrologueEmitter

bool hasFPImpl(const MachineFunction &MF) const override

hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.

Definition AArch64FrameLowering.cpp:553

void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override

emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.

Definition AArch64FrameLowering.cpp:1208

friend class AArch64EpilogueEmitter

void resetCFIToInitialState(MachineBasicBlock &MBB) const override

Emit CFI instructions that recreate the state of the unwind information upon function entry.

Definition AArch64FrameLowering.cpp:707

bool hasReservedCallFrame(const MachineFunction &MF) const override

hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...

Definition AArch64FrameLowering.cpp:634

StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, TargetStackID::Value StackID, Register &FrameReg, bool PreferFP, bool ForSimm) const

Definition AArch64FrameLowering.cpp:1356

bool canUseRedZone(const MachineFunction &MF) const

Can this function use the red zone for local allocations.

Definition AArch64FrameLowering.cpp:523

bool needsWinCFI(const MachineFunction &MF) const

Definition AArch64FrameLowering.cpp:963

bool isFPReserved(const MachineFunction &MF) const

Should the Frame Pointer be reserved for the current function?

Definition AArch64FrameLowering.cpp:609

void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override

processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...

Definition AArch64FrameLowering.cpp:2978

int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const

Definition AArch64FrameLowering.cpp:1335

unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const

Funclets only need to account for space for the callee saved registers, as the locals are accounted f...

Definition AArch64FrameLowering.cpp:3526

void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override

Order the symbols in the local stack frame.

Definition AArch64FrameLowering.cpp:3609

void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override

Definition AArch64FrameLowering.cpp:1214

StackOffset getPPRStackSize(const MachineFunction &MF) const

Returns the size of the entire PPR stackframe (calleesaves + spills + hazard padding).

Definition AArch64FrameLowering.cpp:355

void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

Definition AArch64FrameLowering.cpp:2485

StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override

getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.

Definition AArch64FrameLowering.cpp:1235

StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override

For Win64 AArch64 EH, the offset to the Unwind object is from the SP before the update.

Definition AArch64FrameLowering.cpp:3496

StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const

Definition AArch64FrameLowering.cpp:1345

unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override

The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve the parent's frame pointer...

Definition AArch64FrameLowering.cpp:3519

bool requiresSaveVG(const MachineFunction &MF) const

Definition AArch64FrameLowering.cpp:1159

void emitPacRetPlusLeafHardening(MachineFunction &MF) const

Harden the entire function with pac-ret.

Definition AArch64FrameLowering.cpp:1175

AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...

unsigned getPPRCalleeSavedStackSize() const

void setHasStackFrame(bool s)

void setSwiftAsyncContextFrameIdx(int FI)

unsigned getTailCallReservedStack() const

unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const

void setCalleeSaveBaseToFrameRecordOffset(int Offset)

bool hasStackProbing() const

unsigned getArgumentStackToRestore() const

void setCalleeSaveStackHasFreeSpace(bool s)

int getCalleeSaveBaseToFrameRecordOffset() const

SignReturnAddress getSignReturnAddressCondition() const

bool hasStreamingModeChanges() const

void setPredicateRegForFillSpill(unsigned Reg)

int getStackHazardSlotIndex() const

void setCalleeSavedStackSize(unsigned Size)

void setSplitSVEObjects(bool s)

bool hasStackFrame() const

void setStackSizeSVE(uint64_t ZPR, uint64_t PPR)

std::optional< int > getTaggedBasePointerIndex() const

SMEAttrs getSMEFnAttrs() const

uint64_t getLocalStackSize() const

bool needsDwarfUnwindInfo(const MachineFunction &MF) const

unsigned getVarArgsGPRSize() const

uint64_t getStackSizePPR() const

bool hasSwiftAsyncContext() const

bool hasStackHazardSlotIndex() const

void setStackHazardSlotIndex(int Index)

unsigned getZPRCalleeSavedStackSize() const

void setStackHazardCSRSlotIndex(int Index)

unsigned getPredicateRegForFillSpill() const

void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR)

bool hasCalculatedStackSizeSVE() const

uint64_t getStackSizeZPR() const

bool hasSVEStackSize() const

bool isStackHazardIncludedInCalleeSaveArea() const

unsigned getSVECalleeSavedStackSize() const

bool hasSplitSVEObjects() const

bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const

bool hasCalleeSaveStackFreeSpace() const

static bool isTailCallReturnInst(const MachineInstr &MI)

Returns true if MI is one of the TCRETURN* instructions.

static bool isFpOrNEON(Register Reg)

Returns whether the physical register is FP or NEON.

void emitPrologue()

Emit the prologue.

bool isTargetWindows() const

const AArch64RegisterInfo * getRegisterInfo() const override

bool isNeonAvailable() const

Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....

const AArch64InstrInfo * getInstrInfo() const override

const AArch64TargetLowering * getTargetLowering() const override

bool isTargetMachO() const

bool isSVEorStreamingSVEAvailable() const

Returns true if the target has access to either the full range of SVE instructions,...

bool isStreaming() const

Returns true if the function has a streaming body.

bool hasInlineStackProbe(const MachineFunction &MF) const override

True if stack clash protection is enabled for this functions.

unsigned getRedZoneSize(const Function &F) const

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

bool test(unsigned Idx) const

size_type count() const

count - Returns the number of bits which are set.

iterator_range< const_set_bits_iterator > set_bits() const

size_type size() const

size - Returns the number of bits in this bitvector.

Helper class for creating CFI instructions and inserting them into MIR.

The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...

bool hasMinSize() const

Optimize this function for minimum size (-Oz).

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

AttributeList getAttributes() const

Return the attribute list for this Function.

bool isVarArg() const

isVarArg - Return true if this function takes a variable number of arguments.

bool hasFnAttribute(Attribute::AttrKind Kind) const

Return true if the function has the attribute.

A set of physical registers with utility functions to track liveness when walking backward/forward th...

bool usesWindowsCFI() const

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

Wrapper class representing physical registers. Should be passed by value.

LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)

Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...

LLVM_ABI iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

MachineInstr & instr_back()

LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())

Add Succ as a successor of this MachineBasicBlock.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

reverse_iterator rbegin()

iterator insertAfter(iterator I, MachineInstr *MI)

Insert MI into the instruction list after I.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

MachineInstrBundleIterator< MachineInstr > iterator

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)

Create a new object at a fixed location on the stack.

bool hasVarSizedObjects() const

This method may be called any time after instruction selection is complete to determine if the stack ...

const AllocaInst * getObjectAllocation(int ObjectIdx) const

Return the underlying Alloca of the specified stack object if it exists.

LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)

Create a new statically sized stack object, returning a nonnegative identifier to represent it.

bool hasCalls() const

Return true if the current function has any function calls.

bool isFrameAddressTaken() const

This method may be called any time after instruction selection is complete to determine if there is a...

void setObjectOffset(int ObjectIdx, int64_t SPOffset)

Set the stack frame offset of the specified object.

bool isCalleeSavedObjectIndex(int ObjectIdx) const

uint64_t getMaxCallFrameSize() const

Return the maximum size of a call frame that must be allocated for an outgoing function call.

bool hasPatchPoint() const

This method may be called any time after instruction selection is complete to determine if there is a...

bool hasScalableStackID(int ObjectIdx) const

int getStackProtectorIndex() const

Return the index for the stack protector object.

LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)

Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...

LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const

Estimate and return the size of the stack frame.

void setStackID(int ObjectIdx, uint8_t ID)

bool isCalleeSavedInfoValid() const

Has the callee saved info been calculated yet?

Align getObjectAlign(int ObjectIdx) const

Return the alignment of the specified stack object.

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

bool isMaxCallFrameSizeComputed() const

bool hasStackMap() const

This method may be called any time after instruction selection is complete to determine if there is a...

const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const

Returns a reference to call saved info vector for the current function.

unsigned getNumObjects() const

Return the number of objects.

int getObjectIndexEnd() const

Return one past the maximum frame object index.

bool hasStackProtectorIndex() const

bool hasStackObjects() const

Return true if there are any stack objects in this function.

uint8_t getStackID(int ObjectIdx) const

unsigned getNumFixedObjects() const

Return the number of fixed objects.

void setIsCalleeSavedObjectIndex(int ObjectIdx, bool IsCalleeSaved)

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

int getObjectIndexBegin() const

Return the minimum frame object index.

void setObjectAlignment(int ObjectIdx, Align Alignment)

setObjectAlignment - Change the alignment of the specified stack object.

bool isDeadObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a dead object.

const WinEHFuncInfo * getWinEHFuncInfo() const

getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

BasicBlockListType::iterator iterator

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineBasicBlock & front() const

bool hasEHFunclets() const

MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)

CreateMachineInstr - Allocate a new MachineInstr.

void insert(iterator MBBI, MachineBasicBlock *MBB)

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const

const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const

const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addFrameIndex(int Idx) const

const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

const MachineInstrBuilder & setMIFlags(unsigned Flags) const

const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

void setFlags(unsigned flags)

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

uint32_t getFlags() const

Return the MI flags bitvector.

A description of a memory reference used in the backend.

const PseudoSourceValue * getPseudoValue() const

@ MOLoad

The memory access reads data.

@ MOStore

The memory access writes data.

const Value * getValue() const

Return the base address of the memory access.

MachineOperand class - Representation of each machine instruction operand.

bool isFI() const

isFI - Tests if this is a MO_FrameIndex operand.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")

createVirtualRegister - Create and return a new virtual register in the function with the specified r...

LLVM_ABI bool isLiveIn(Register Reg) const

LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const

Returns list of callee saved registers.

LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const

Return true if the specified register is modified or read in this function.

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

Wrapper class representing virtual and physical registers.

constexpr bool isValid() const

SMEAttrs is a utility class to parse the SME ACLE attributes on functions.

bool hasStreamingInterface() const

bool hasNonStreamingInterfaceAndBody() const

bool hasStreamingBody() const

bool insert(const value_type &X)

Insert a new element into the SetVector.

A SetVector that performs no allocations if smaller than a certain size.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

int64_t getFixed() const

Returns the fixed component of the stack.

int64_t getScalable() const

Returns the scalable component of the stack.

static StackOffset get(int64_t Fixed, int64_t Scalable)

static StackOffset getScalable(int64_t Scalable)

static StackOffset getFixed(int64_t Fixed)

bool hasFP(const MachineFunction &MF) const

hasFP - Return true if the specified function should have a dedicated frame pointer register.

virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

int getOffsetOfLocalArea() const

getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...

Align getStackAlign() const

getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...

StackDirection getStackGrowthDirection() const

getStackGrowthDirection - Return the direction the stack grows

virtual bool enableCFIFixup(const MachineFunction &MF) const

Returns true if we may need to fix the unwind information for the function.

TargetInstrInfo - Interface to description of machine instruction set.

virtual void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const

Insert an architecture-specific instruction to clear a register.

Primary interface to the complete machine description for the target machine.

const Triple & getTargetTriple() const

const MCAsmInfo * getMCAsmInfo() const

Return target specific asm information.

LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const

FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...

LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const

DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

bool hasStackRealignment(const MachineFunction &MF) const

True if stack realignment is required and still possible.

virtual const TargetInstrInfo * getInstrInfo() const

virtual const TargetRegisterInfo * getRegisterInfo() const =0

Return the target's register information.

Triple - Helper class for working with autoconf configuration names.

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

static unsigned getShiftValue(unsigned Imm)

getShiftValue - Extract the shift value.

static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)

getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...

const unsigned StackProbeMaxLoopUnroll

Maximum number of iterations to unroll for a constant size probing loop.

const unsigned StackProbeMaxUnprobedStack

Maximum allowed number of unprobed bytes above SP at an ABI boundary.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

constexpr char Attrs[]

Key for Kernel::Metadata::mAttrs.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AArch64_SVE_VectorCall

Used between AArch64 SVE functions.

@ PreserveMost

Used for runtime calls that preserves most registers.

@ CXX_FAST_TLS

Used for access functions.

@ GHC

Used by the Glasgow Haskell Compiler (GHC).

@ PreserveAll

Used for runtime calls that preserves (almost) all registers.

@ Fast

Attempts to make calls as fast as possible (e.g.

@ PreserveNone

Used for runtime calls that preserves none general registers.

@ Win64

The C convention as implemented on Windows/x86-64 and AArch64.

@ SwiftTail

This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...

@ C

The default llvm calling convention, compatible with C.

@ Define

Register definition.

@ ScalablePredicateVector

initializer< Ty > init(const Ty &Val)

NodeAddr< InstrNode * > Instr

BaseReg

Stack frame base register. Bit 0 of FREInfo.Info.

This is an optimization pass for GlobalISel generic memory operations.

void stable_sort(R &&Range)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)

Check if the Offset is a valid frame offset for MI.

detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

@ AArch64FrameOffsetCannotUpdate

Offset cannot apply.

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)

auto reverse(ContainerTy &&C)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)

emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

FunctionAddr VTableAddr Count

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

@ LLVM_MARK_AS_BITMASK_ENUM

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

unsigned getDefRegState(bool B)

unsigned getKillRegState(bool B)

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)

Convenience function for recomputing live-in's for a set of MBBs until the computation converges.

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

bool isMixed() const

Definition AArch64FrameLowering.cpp:3916

bool operator<(const StackAccess &Rhs) const

Definition AArch64FrameLowering.cpp:3906

StackAccess()

Definition AArch64FrameLowering.cpp:3904

StackOffset Offset

Definition AArch64FrameLowering.cpp:3900

void print(raw_ostream &OS) const

Definition AArch64FrameLowering.cpp:3936

bool isSME() const

Definition AArch64FrameLowering.cpp:3915

int Idx

Definition AArch64FrameLowering.cpp:3899

int64_t start() const

Definition AArch64FrameLowering.cpp:3918

unsigned AccessTypes

Definition AArch64FrameLowering.cpp:3902

AccessType

Definition AArch64FrameLowering.cpp:3892

@ NotAccessed

Definition AArch64FrameLowering.cpp:3893

@ GPR

Definition AArch64FrameLowering.cpp:3894

@ PPR

Definition AArch64FrameLowering.cpp:3895

@ FPR

Definition AArch64FrameLowering.cpp:3896

bool isCPU() const

Definition AArch64FrameLowering.cpp:3911

int64_t Size

Definition AArch64FrameLowering.cpp:3901

std::string getTypeString() const

Definition AArch64FrameLowering.cpp:3921

int64_t end() const

Definition AArch64FrameLowering.cpp:3919

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

Pair of physical register and lane mask.

static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.

SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap

SmallVector< WinEHHandlerType, 1 > HandlerArray