LLVM: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

46#include

47#include

48#include

49#include

50#include

51#include

52

53using namespace llvm;

54

55#define DEBUG_TYPE "aarch64-ldst-opt"

56

57STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");

58STATISTIC(NumPostFolded, "Number of post-index updates folded");

59STATISTIC(NumPreFolded, "Number of pre-index updates folded");

61 "Number of load/store from unscaled generated");

62STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");

63STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");

64STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "

65 "not passed the alignment check");

67 "Number of const offset of index address folded");

68

70 "Controls which pairs are considered for renaming");

71

72

75

76

77

80

81

82

85

86

89

90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"

91

92namespace {

93

94using LdStPairFlags = struct LdStPairFlags {

95

96

97

98 bool MergeForward = false;

99

100

101

102

103

104 int SExtIdx = -1;

105

106

107

108

109 std::optional RenameReg;

110

111 LdStPairFlags() = default;

112

113 void setMergeForward(bool V = true) { MergeForward = V; }

114 bool getMergeForward() const { return MergeForward; }

115

116 void setSExtIdx(int V) { SExtIdx = V; }

117 int getSExtIdx() const { return SExtIdx; }

118

119 void setRenameReg(MCPhysReg R) { RenameReg = R; }

120 void clearRenameReg() { RenameReg = std::nullopt; }

121 std::optional getRenameReg() const { return RenameReg; }

122};

123

125 static char ID;

126

129 }

130

135

136

139

143 }

144

145

146

147

149 LdStPairFlags &Flags,

150 unsigned Limit,

151 bool FindNarrowMerge);

152

153

154

157

158

162 const LdStPairFlags &Flags);

163

164

168 const LdStPairFlags &Flags);

169

170

174

175

176

177

180 int UnscaledOffset, unsigned Limit);

181

182

183

184

188

189

190

191

192

193

194

197 bool &MergeEither);

198

199

200

202 unsigned BaseReg, int Offset);

203

205 unsigned IndexReg, unsigned &Offset);

206

207

208 std::optionalMachineBasicBlock::iterator

211 bool IsPreIdx, bool MergeEither);

212

216 int Scale);

217

218

220

221

223

224

226

227

229

230

232

234

236

239 MachineFunctionProperties::Property::NoVRegs);

240 }

241

243};

244

245char AArch64LoadStoreOpt::ID = 0;

246

247}

248

251

252static bool isNarrowStore(unsigned Opc) {

253 switch (Opc) {

254 default:

255 return false;

256 case AArch64::STRBBui:

257 case AArch64::STURBBi:

258 case AArch64::STRHHui:

259 case AArch64::STURHHi:

260 return true;

261 }

262}

263

264

265

267 switch (MI.getOpcode()) {

268 default:

269 return false;

270 case AArch64::STGi:

271 case AArch64::STZGi:

272 case AArch64::ST2Gi:

273 case AArch64::STZ2Gi:

274 return true;

275 }

276}

277

279 bool *IsValidLdStrOpc = nullptr) {

280 if (IsValidLdStrOpc)

281 *IsValidLdStrOpc = true;

282 switch (Opc) {

283 default:

284 if (IsValidLdStrOpc)

285 *IsValidLdStrOpc = false;

286 return std::numeric_limits::max();

287 case AArch64::STRDui:

288 case AArch64::STURDi:

289 case AArch64::STRDpre:

290 case AArch64::STRQui:

291 case AArch64::STURQi:

292 case AArch64::STRQpre:

293 case AArch64::STRBBui:

294 case AArch64::STURBBi:

295 case AArch64::STRHHui:

296 case AArch64::STURHHi:

297 case AArch64::STRWui:

298 case AArch64::STRWpre:

299 case AArch64::STURWi:

300 case AArch64::STRXui:

301 case AArch64::STRXpre:

302 case AArch64::STURXi:

303 case AArch64::LDRDui:

304 case AArch64::LDURDi:

305 case AArch64::LDRDpre:

306 case AArch64::LDRQui:

307 case AArch64::LDURQi:

308 case AArch64::LDRQpre:

309 case AArch64::LDRWui:

310 case AArch64::LDURWi:

311 case AArch64::LDRWpre:

312 case AArch64::LDRXui:

313 case AArch64::LDURXi:

314 case AArch64::LDRXpre:

315 case AArch64::STRSui:

316 case AArch64::STURSi:

317 case AArch64::STRSpre:

318 case AArch64::LDRSui:

319 case AArch64::LDURSi:

320 case AArch64::LDRSpre:

321 return Opc;

322 case AArch64::LDRSWui:

323 return AArch64::LDRWui;

324 case AArch64::LDURSWi:

325 return AArch64::LDURWi;

326 case AArch64::LDRSWpre:

327 return AArch64::LDRWpre;

328 }

329}

330

332 switch (Opc) {

333 default:

335 case AArch64::STRBBui:

336 return AArch64::STRHHui;

337 case AArch64::STRHHui:

338 return AArch64::STRWui;

339 case AArch64::STURBBi:

340 return AArch64::STURHHi;

341 case AArch64::STURHHi:

342 return AArch64::STURWi;

343 case AArch64::STURWi:

344 return AArch64::STURXi;

345 case AArch64::STRWui:

346 return AArch64::STRXui;

347 }

348}

349

351 switch (Opc) {

352 default:

354 case AArch64::STRSui:

355 case AArch64::STURSi:

356 return AArch64::STPSi;

357 case AArch64::STRSpre:

358 return AArch64::STPSpre;

359 case AArch64::STRDui:

360 case AArch64::STURDi:

361 return AArch64::STPDi;

362 case AArch64::STRDpre:

363 return AArch64::STPDpre;

364 case AArch64::STRQui:

365 case AArch64::STURQi:

366 return AArch64::STPQi;

367 case AArch64::STRQpre:

368 return AArch64::STPQpre;

369 case AArch64::STRWui:

370 case AArch64::STURWi:

371 return AArch64::STPWi;

372 case AArch64::STRWpre:

373 return AArch64::STPWpre;

374 case AArch64::STRXui:

375 case AArch64::STURXi:

376 return AArch64::STPXi;

377 case AArch64::STRXpre:

378 return AArch64::STPXpre;

379 case AArch64::LDRSui:

380 case AArch64::LDURSi:

381 return AArch64::LDPSi;

382 case AArch64::LDRSpre:

383 return AArch64::LDPSpre;

384 case AArch64::LDRDui:

385 case AArch64::LDURDi:

386 return AArch64::LDPDi;

387 case AArch64::LDRDpre:

388 return AArch64::LDPDpre;

389 case AArch64::LDRQui:

390 case AArch64::LDURQi:

391 return AArch64::LDPQi;

392 case AArch64::LDRQpre:

393 return AArch64::LDPQpre;

394 case AArch64::LDRWui:

395 case AArch64::LDURWi:

396 return AArch64::LDPWi;

397 case AArch64::LDRWpre:

398 return AArch64::LDPWpre;

399 case AArch64::LDRXui:

400 case AArch64::LDURXi:

401 return AArch64::LDPXi;

402 case AArch64::LDRXpre:

403 return AArch64::LDPXpre;

404 case AArch64::LDRSWui:

405 case AArch64::LDURSWi:

406 return AArch64::LDPSWi;

407 case AArch64::LDRSWpre:

408 return AArch64::LDPSWpre;

409 }

410}

411

416 switch (LdOpc) {

417 default:

419 case AArch64::LDRBBui:

420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||

421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;

422 case AArch64::LDURBBi:

423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||

424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;

425 case AArch64::LDRHHui:

426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||

427 StOpc == AArch64::STRXui;

428 case AArch64::LDURHHi:

429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||

430 StOpc == AArch64::STURXi;

431 case AArch64::LDRWui:

432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;

433 case AArch64::LDURWi:

434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;

435 case AArch64::LDRXui:

436 return StOpc == AArch64::STRXui;

437 case AArch64::LDURXi:

438 return StOpc == AArch64::STURXi;

439 }

440}

441

443

444

445

446

447 switch (Opc) {

448 default:

450 case AArch64::STRSui:

451 return AArch64::STRSpre;

452 case AArch64::STRDui:

453 return AArch64::STRDpre;

454 case AArch64::STRQui:

455 return AArch64::STRQpre;

456 case AArch64::STRBBui:

457 return AArch64::STRBBpre;

458 case AArch64::STRHHui:

459 return AArch64::STRHHpre;

460 case AArch64::STRWui:

461 return AArch64::STRWpre;

462 case AArch64::STRXui:

463 return AArch64::STRXpre;

464 case AArch64::LDRSui:

465 return AArch64::LDRSpre;

466 case AArch64::LDRDui:

467 return AArch64::LDRDpre;

468 case AArch64::LDRQui:

469 return AArch64::LDRQpre;

470 case AArch64::LDRBBui:

471 return AArch64::LDRBBpre;

472 case AArch64::LDRHHui:

473 return AArch64::LDRHHpre;

474 case AArch64::LDRWui:

475 return AArch64::LDRWpre;

476 case AArch64::LDRXui:

477 return AArch64::LDRXpre;

478 case AArch64::LDRSWui:

479 return AArch64::LDRSWpre;

480 case AArch64::LDPSi:

481 return AArch64::LDPSpre;

482 case AArch64::LDPSWi:

483 return AArch64::LDPSWpre;

484 case AArch64::LDPDi:

485 return AArch64::LDPDpre;

486 case AArch64::LDPQi:

487 return AArch64::LDPQpre;

488 case AArch64::LDPWi:

489 return AArch64::LDPWpre;

490 case AArch64::LDPXi:

491 return AArch64::LDPXpre;

492 case AArch64::STPSi:

493 return AArch64::STPSpre;

494 case AArch64::STPDi:

495 return AArch64::STPDpre;

496 case AArch64::STPQi:

497 return AArch64::STPQpre;

498 case AArch64::STPWi:

499 return AArch64::STPWpre;

500 case AArch64::STPXi:

501 return AArch64::STPXpre;

502 case AArch64::STGi:

503 return AArch64::STGPreIndex;

504 case AArch64::STZGi:

505 return AArch64::STZGPreIndex;

506 case AArch64::ST2Gi:

507 return AArch64::ST2GPreIndex;

508 case AArch64::STZ2Gi:

509 return AArch64::STZ2GPreIndex;

510 case AArch64::STGPi:

511 return AArch64::STGPpre;

512 }

513}

514

516

517 switch (Opc) {

518 default:

520 case AArch64::LDRBroX:

521 return AArch64::LDRBui;

522 case AArch64::LDRBBroX:

523 return AArch64::LDRBBui;

524 case AArch64::LDRSBXroX:

525 return AArch64::LDRSBXui;

526 case AArch64::LDRSBWroX:

527 return AArch64::LDRSBWui;

528 case AArch64::LDRHroX:

529 return AArch64::LDRHui;

530 case AArch64::LDRHHroX:

531 return AArch64::LDRHHui;

532 case AArch64::LDRSHXroX:

533 return AArch64::LDRSHXui;

534 case AArch64::LDRSHWroX:

535 return AArch64::LDRSHWui;

536 case AArch64::LDRWroX:

537 return AArch64::LDRWui;

538 case AArch64::LDRSroX:

539 return AArch64::LDRSui;

540 case AArch64::LDRSWroX:

541 return AArch64::LDRSWui;

542 case AArch64::LDRDroX:

543 return AArch64::LDRDui;

544 case AArch64::LDRXroX:

545 return AArch64::LDRXui;

546 case AArch64::LDRQroX:

547 return AArch64::LDRQui;

548 }

549}

550

552 switch (Opc) {

553 default:

554 llvm_unreachable("Opcode has no post-indexed wise equivalent!");

555 case AArch64::STRSui:

556 case AArch64::STURSi:

557 return AArch64::STRSpost;

558 case AArch64::STRDui:

559 case AArch64::STURDi:

560 return AArch64::STRDpost;

561 case AArch64::STRQui:

562 case AArch64::STURQi:

563 return AArch64::STRQpost;

564 case AArch64::STRBBui:

565 return AArch64::STRBBpost;

566 case AArch64::STRHHui:

567 return AArch64::STRHHpost;

568 case AArch64::STRWui:

569 case AArch64::STURWi:

570 return AArch64::STRWpost;

571 case AArch64::STRXui:

572 case AArch64::STURXi:

573 return AArch64::STRXpost;

574 case AArch64::LDRSui:

575 case AArch64::LDURSi:

576 return AArch64::LDRSpost;

577 case AArch64::LDRDui:

578 case AArch64::LDURDi:

579 return AArch64::LDRDpost;

580 case AArch64::LDRQui:

581 case AArch64::LDURQi:

582 return AArch64::LDRQpost;

583 case AArch64::LDRBBui:

584 return AArch64::LDRBBpost;

585 case AArch64::LDRHHui:

586 return AArch64::LDRHHpost;

587 case AArch64::LDRWui:

588 case AArch64::LDURWi:

589 return AArch64::LDRWpost;

590 case AArch64::LDRXui:

591 case AArch64::LDURXi:

592 return AArch64::LDRXpost;

593 case AArch64::LDRSWui:

594 return AArch64::LDRSWpost;

595 case AArch64::LDPSi:

596 return AArch64::LDPSpost;

597 case AArch64::LDPSWi:

598 return AArch64::LDPSWpost;

599 case AArch64::LDPDi:

600 return AArch64::LDPDpost;

601 case AArch64::LDPQi:

602 return AArch64::LDPQpost;

603 case AArch64::LDPWi:

604 return AArch64::LDPWpost;

605 case AArch64::LDPXi:

606 return AArch64::LDPXpost;

607 case AArch64::STPSi:

608 return AArch64::STPSpost;

609 case AArch64::STPDi:

610 return AArch64::STPDpost;

611 case AArch64::STPQi:

612 return AArch64::STPQpost;

613 case AArch64::STPWi:

614 return AArch64::STPWpost;

615 case AArch64::STPXi:

616 return AArch64::STPXpost;

617 case AArch64::STGi:

618 return AArch64::STGPostIndex;

619 case AArch64::STZGi:

620 return AArch64::STZGPostIndex;

621 case AArch64::ST2Gi:

622 return AArch64::ST2GPostIndex;

623 case AArch64::STZ2Gi:

624 return AArch64::STZ2GPostIndex;

625 case AArch64::STGPi:

626 return AArch64::STGPpost;

627 }

628}

629

631

632 unsigned OpcA = FirstMI.getOpcode();

633 unsigned OpcB = MI.getOpcode();

634

635 switch (OpcA) {

636 default:

637 return false;

638 case AArch64::STRSpre:

639 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);

640 case AArch64::STRDpre:

641 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);

642 case AArch64::STRQpre:

643 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);

644 case AArch64::STRWpre:

645 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);

646 case AArch64::STRXpre:

647 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);

648 case AArch64::LDRSpre:

649 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);

650 case AArch64::LDRDpre:

651 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);

652 case AArch64::LDRQpre:

653 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);

654 case AArch64::LDRWpre:

655 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);

656 case AArch64::LDRXpre:

657 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);

658 case AArch64::LDRSWpre:

659 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);

660 }

661}

662

663

665 int &MinOffset, int &MaxOffset) {

668

669

670

672

673 if (IsPaired) {

674 MinOffset = -64;

675 MaxOffset = 63;

676 } else {

677 MinOffset = -256;

678 MaxOffset = 255;

679 }

680}

681

683 unsigned PairedRegOp = 0) {

684 assert(PairedRegOp < 2 && "Unexpected register operand idx.");

686 if (IsPreLdSt)

687 PairedRegOp += 1;

688 unsigned Idx =

690 return MI.getOperand(Idx);

691}

692

697 int LoadSize = TII->getMemScale(LoadInst);

699 int UnscaledStOffset =

703 int UnscaledLdOffset =

707 return (UnscaledStOffset <= UnscaledLdOffset) &&

708 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));

709}

710

712 unsigned Opc = MI.getOpcode();

713 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||

714 isNarrowStore(Opc)) &&

716}

717

719 switch (MI.getOpcode()) {

720 default:

721 return false;

722

723 case AArch64::LDRBBui:

724 case AArch64::LDRHHui:

725 case AArch64::LDRWui:

726 case AArch64::LDRXui:

727

728 case AArch64::LDURBBi:

729 case AArch64::LDURHHi:

730 case AArch64::LDURWi:

731 case AArch64::LDURXi:

732 return true;

733 }

734}

735

737 unsigned Opc = MI.getOpcode();

738 switch (Opc) {

739 default:

740 return false;

741

742 case AArch64::STRSui:

743 case AArch64::STRDui:

744 case AArch64::STRQui:

745 case AArch64::STRXui:

746 case AArch64::STRWui:

747 case AArch64::STRHHui:

748 case AArch64::STRBBui:

749 case AArch64::LDRSui:

750 case AArch64::LDRDui:

751 case AArch64::LDRQui:

752 case AArch64::LDRXui:

753 case AArch64::LDRWui:

754 case AArch64::LDRHHui:

755 case AArch64::LDRBBui:

756 case AArch64::STGi:

757 case AArch64::STZGi:

758 case AArch64::ST2Gi:

759 case AArch64::STZ2Gi:

760 case AArch64::STGPi:

761

762 case AArch64::STURSi:

763 case AArch64::STURDi:

764 case AArch64::STURQi:

765 case AArch64::STURWi:

766 case AArch64::STURXi:

767 case AArch64::LDURSi:

768 case AArch64::LDURDi:

769 case AArch64::LDURQi:

770 case AArch64::LDURWi:

771 case AArch64::LDURXi:

772

773 case AArch64::LDPSi:

774 case AArch64::LDPSWi:

775 case AArch64::LDPDi:

776 case AArch64::LDPQi:

777 case AArch64::LDPWi:

778 case AArch64::LDPXi:

779 case AArch64::STPSi:

780 case AArch64::STPDi:

781 case AArch64::STPQi:

782 case AArch64::STPWi:

783 case AArch64::STPXi:

784

786 return false;

787

788

789

790

791

792

795 return false;

796

797 return true;

798 }

799}

800

801

803 unsigned Opc = MI.getOpcode();

804 switch (Opc) {

805 default:

806 return false;

807

808

809 case AArch64::LDRBroX:

810 case AArch64::LDRBBroX:

811 case AArch64::LDRSBXroX:

812 case AArch64::LDRSBWroX:

813 Scale = 1;

814 return true;

815 case AArch64::LDRHroX:

816 case AArch64::LDRHHroX:

817 case AArch64::LDRSHXroX:

818 case AArch64::LDRSHWroX:

819 Scale = 2;

820 return true;

821 case AArch64::LDRWroX:

822 case AArch64::LDRSroX:

823 case AArch64::LDRSWroX:

824 Scale = 4;

825 return true;

826 case AArch64::LDRDroX:

827 case AArch64::LDRXroX:

828 Scale = 8;

829 return true;

830 case AArch64::LDRQroX:

831 Scale = 16;

832 return true;

833 }

834}

835

837 switch (Opc) {

838 default:

839 return false;

840 case AArch64::ORRWrs:

841 case AArch64::ADDWri:

842 return true;

843 }

844}

845

849 const LdStPairFlags &Flags) {

851 "Expected promotable zero stores.");

852

855

856

857

858

859 if (NextI == MergeMI)

861

862 unsigned Opc = I->getOpcode();

863 unsigned MergeMIOpc = MergeMI->getOpcode();

864 bool IsScaled = TII->hasUnscaledLdStOffset(Opc);

865 bool IsMergedMIScaled = TII->hasUnscaledLdStOffset(MergeMIOpc);

866 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;

867 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;

868

869 bool MergeForward = Flags.getMergeForward();

870

871

873

874

878

879

880 int64_t IOffsetInBytes =

882 int64_t MIOffsetInBytes =

884 MergeMIOffsetStride;

885

886 int64_t OffsetImm;

887 if (IOffsetInBytes > MIOffsetInBytes)

888 OffsetImm = MIOffsetInBytes;

889 else

890 OffsetImm = IOffsetInBytes;

891

893 bool FinalIsScaled = TII->hasUnscaledLdStOffset(NewOpcode);

894

895

896 if (FinalIsScaled) {

897 int NewOffsetStride = FinalIsScaled ? TII->getMemScale(NewOpcode) : 1;

898 assert(((OffsetImm % NewOffsetStride) == 0) &&

899 "Offset should be a multiple of the store memory scale");

900 OffsetImm = OffsetImm / NewOffsetStride;

901 }

902

903

908 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)

909 .add(BaseRegOp)

912 .setMIFlags(I->mergeFlagsWith(*MergeMI));

913 (void)MIB;

914

915 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");

922

923

924 I->eraseFromParent();

925 MergeMI->eraseFromParent();

926 return NextI;

927}

928

929

930

931

934 std::function<bool(MachineInstr &, bool)> &Fn) {

935 auto MBB = MI.getParent();

938 if (!Limit)

939 return false;

940 --Limit;

941

943 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&

944 TRI->regsOverlap(MOP.getReg(), DefReg);

945 });

946 if (!Fn(I, isDef))

947 return false;

948 if (isDef)

949 break;

950 }

951 return true;

952}

953

956

958 if (MOP.isReg() && MOP.isKill())

960

962 if (MOP.isReg() && !MOP.isKill())

963 Units.addReg(MOP.getReg());

964}

965

969 const LdStPairFlags &Flags) {

972

973

974

975

976 if (NextI == Paired)

978

979 int SExtIdx = Flags.getSExtIdx();

980 unsigned Opc =

982 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);

983 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;

984

985 bool MergeForward = Flags.getMergeForward();

986

987 std::optional RenameReg = Flags.getRenameReg();

988 if (RenameReg) {

990 DefinedInBB.addReg(*RenameReg);

991

992

993

994 auto GetMatchingSubReg =

997 TRI->sub_and_superregs_inclusive(*RenameReg)) {

998 if (C->contains(SubOrSuper))

999 return SubOrSuper;

1000 }

1001 llvm_unreachable("Should have found matching sub or super register!");

1002 };

1003

1005 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,

1006 bool IsDef) {

1007 if (IsDef) {

1008 bool SeenDef = false;

1009 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {

1011

1012

1014 (!MergeForward || !SeenDef ||

1016 TRI->regsOverlap(MOP.getReg(), RegToRename)) {

1019 "Need renamable operands");

1022 MI.getRegClassConstraint(OpIdx, TII, TRI))

1023 MatchingReg = GetMatchingSubReg(RC);

1024 else {

1026 continue;

1027 MatchingReg = GetMatchingSubReg(

1028 TRI->getMinimalPhysRegClass(MOP.getReg()));

1029 }

1030 MOP.setReg(MatchingReg);

1031 SeenDef = true;

1032 }

1033 }

1034 } else {

1035 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {

1038 TRI->regsOverlap(MOP.getReg(), RegToRename)) {

1041 "Need renamable operands");

1044 MI.getRegClassConstraint(OpIdx, TII, TRI))

1045 MatchingReg = GetMatchingSubReg(RC);

1046 else

1047 MatchingReg = GetMatchingSubReg(

1048 TRI->getMinimalPhysRegClass(MOP.getReg()));

1049 assert(MatchingReg != AArch64::NoRegister &&

1050 "Cannot find matching regs for renaming");

1051 MOP.setReg(MatchingReg);

1052 }

1053 }

1054 }

1056 return true;

1057 };

1059 UINT32_MAX, UpdateMIs);

1060

1061#if !defined(NDEBUG)

1062

1063

1064

1065

1066 MCPhysReg RegToCheck = *RenameReg;

1067

1068

1069

1070

1071 if (!MergeForward)

1072 RegToCheck = RegToRename;

1073 for (auto &MI :

1075 MergeForward ? std::next(I) : I,

1076 MergeForward ? std::next(Paired) : Paired))

1079 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||

1080 MOP.isUndef() ||

1081 !TRI->regsOverlap(MOP.getReg(), RegToCheck);

1082 }) &&

1083 "Rename register used between paired instruction, trashing the "

1084 "content");

1085#endif

1086 }

1087

1088

1089

1091

1092

1096

1099 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());

1100 if (IsUnscaled != PairedIsUnscaled) {

1101

1102

1103

1104 int MemSize = TII->getMemScale(*Paired);

1105 if (PairedIsUnscaled) {

1106

1107

1108 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&

1109 "Offset should be a multiple of the stride!");

1110 PairedOffset /= MemSize;

1111 } else {

1112 PairedOffset *= MemSize;

1113 }

1114 }

1115

1116

1117

1118

1120 if (Offset == PairedOffset + OffsetStride &&

1122 RtMI = &*Paired;

1123 Rt2MI = &*I;

1124

1125

1126

1127 if (SExtIdx != -1)

1128 SExtIdx = (SExtIdx + 1) % 2;

1129 } else {

1130 RtMI = &*I;

1131 Rt2MI = &*Paired;

1132 }

1134

1135 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {

1136 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&

1137 "Unscaled offset cannot be scaled.");

1138 OffsetImm /= TII->getMemScale(*RtMI);

1139 }

1140

1141

1147 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;

1148

1149 if (RegOp0.isUse()) {

1150 if (!MergeForward) {

1151

1152

1153

1154

1155

1156

1157

1158

1159

1160

1161 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)

1162 if (It->readsRegister(PairedRegOp.getReg(), TRI))

1164 } else {

1165

1166

1167

1168

1171 MI.clearRegisterKills(Reg, TRI);

1172 }

1173 }

1174

1177

1178

1181

1182 MIB.add(RegOp0)

1183 .add(RegOp1)

1184 .add(BaseRegOp)

1187 .setMIFlags(I->mergeFlagsWith(*Paired));

1188

1189 (void)MIB;

1190

1192 dbgs() << "Creating pair load/store. Replacing instructions:\n ");

1197 if (SExtIdx != -1) {

1198

1199

1200

1201

1203

1204

1206

1207 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);

1208

1209 DstMO.setReg(DstRegW);

1212

1213

1214

1215

1221

1227 (void)MIBSXTW;

1230 } else {

1232 }

1234

1235 if (MergeForward)

1238 DefinedInBB.addReg(MOP.getReg());

1239

1240

1241 I->eraseFromParent();

1242 Paired->eraseFromParent();

1243

1244 return NextI;

1245}

1246

1251 next_nodbg(LoadI, LoadI->getParent()->end());

1252

1253 int LoadSize = TII->getMemScale(*LoadI);

1254 int StoreSize = TII->getMemScale(*StoreI);

1258 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);

1259

1260 assert((IsStoreXReg ||

1261 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&

1262 "Unexpected RegClass");

1263

1265 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {

1266

1267

1268 if (StRt == LdRt && LoadSize == 8) {

1270 LoadI->getIterator())) {

1271 if (MI.killsRegister(StRt, TRI)) {

1272 MI.clearRegisterKills(StRt, TRI);

1273 break;

1274 }

1275 }

1276 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");

1279 LoadI->eraseFromParent();

1280 return NextI;

1281 }

1282

1283 BitExtMI =

1284 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1285 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)

1286 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)

1287 .add(StMO)

1290 } else {

1291

1292

1293 if (!Subtarget->isLittleEndian())

1294 return NextI;

1295 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);

1296 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&

1297 "Unsupported ld/st match");

1298 assert(LoadSize <= StoreSize && "Invalid load size");

1299 int UnscaledLdOffset =

1300 IsUnscaled

1303 int UnscaledStOffset =

1304 IsUnscaled

1307 int Width = LoadSize * 8;

1309 IsStoreXReg ? Register(TRI->getMatchingSuperReg(

1310 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))

1311 : LdRt;

1312

1313 assert((UnscaledLdOffset >= UnscaledStOffset &&

1314 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&

1315 "Invalid offset");

1316

1317 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);

1318 int Imms = Immr + Width - 1;

1319 if (UnscaledLdOffset == UnscaledStOffset) {

1320 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)

1321 | ((Immr) << 6)

1322 | ((Imms) << 0)

1323 ;

1324

1325 BitExtMI =

1326 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1327 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),

1328 DestReg)

1329 .add(StMO)

1330 .addImm(AndMaskEncoded)

1332 } else if (IsStoreXReg && Imms == 31) {

1333

1334

1335 assert(Immr <= Imms && "Expected LSR alias of UBFM");

1336 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1337 TII->get(AArch64::UBFMWri),

1338 TRI->getSubReg(DestReg, AArch64::sub_32))

1339 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))

1343 } else {

1344 BitExtMI =

1345 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1346 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),

1347 DestReg)

1348 .add(StMO)

1352 }

1353 }

1354

1355

1358 if (MI.killsRegister(StRt, TRI)) {

1359 MI.clearRegisterKills(StRt, TRI);

1360 break;

1361 }

1362

1363 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");

1372

1373

1374 LoadI->eraseFromParent();

1375 return NextI;

1376}

1377

1379

1380

1381 if (IsUnscaled) {

1382

1383

1384 if (Offset % OffsetStride)

1385 return false;

1386 Offset /= OffsetStride;

1387 }

1388 return Offset <= 63 && Offset >= -64;

1389}

1390

1391

1392

1393

1394

1395static int alignTo(int Num, int PowOf2) {

1396 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);

1397}

1398

1403 if (MIa.mayAlias(AA, *MIb, false)) {

1404 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());

1405 return true;

1406 }

1407 }

1408

1410 return false;

1411}

1412

1413bool AArch64LoadStoreOpt::findMatchingStore(

1420

1421

1422

1424 return false;

1425

1426

1427

1428 ModifiedRegUnits.clear();

1429 UsedRegUnits.clear();

1430

1431 unsigned Count = 0;

1432 do {

1435

1436

1437

1438 if (MI.isTransient())

1439 ++Count;

1440

1441

1442

1443

1444

1445

1446

1452 StoreI = MBBI;

1453 return true;

1454 }

1455

1456 if (MI.isCall())

1457 return false;

1458

1459

1461

1462

1463

1464 if (!ModifiedRegUnits.available(BaseReg))

1465 return false;

1466

1467

1468 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, false))

1469 return false;

1470 } while (MBBI != B && Count < Limit);

1471 return false;

1472}

1473

1477}

1478

1479

1480

1482 LdStPairFlags &Flags,

1484

1485 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))

1486 return false;

1487

1488

1490 TII->isLdStPairSuppressed(FirstMI) &&

1491 "FirstMI shouldn't get here if either of these checks are true.");

1492

1495 return false;

1496

1497 unsigned OpcA = FirstMI.getOpcode();

1498 unsigned OpcB = MI.getOpcode();

1499

1500

1501 if (OpcA == OpcB)

1503

1504

1506 return false;

1507

1508

1509 bool IsValidLdStrOpc, PairIsValidLdStrOpc;

1511 assert(IsValidLdStrOpc &&

1512 "Given Opc should be a Load or Store with an immediate");

1513

1515 Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);

1516 return true;

1517 }

1518

1519

1520

1521 if (!PairIsValidLdStrOpc)

1522 return false;

1523

1524

1525

1526 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))

1527 return false;

1528

1529

1530

1531

1533 return true;

1534

1535

1536 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&

1538

1539

1540}

1541

1544 if (MOP.isReg()) {

1545 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());

1546

1547

1548

1549

1550

1551

1552 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&

1553 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||

1554 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||

1555 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {

1558 << " Cannot rename operands with multiple disjunct subregisters ("

1559 << MOP << ")\n");

1560 return false;

1561 }

1562

1563

1564

1565

1568 return false;

1569 return TRI->isSuperOrSubRegisterEq(

1571 }

1572 }

1575}

1576

1577static bool

1582 return false;

1583

1584

1585

1586

1588

1592 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&

1593 MOP.isImplicit() && MOP.isKill() &&

1594 TRI->regsOverlap(RegToRename, MOP.getReg());

1595 })) {

1596 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);

1597 return false;

1598 }

1599

1600 bool FoundDef = false;

1601

1602

1603

1604

1605

1607 bool IsDef) {

1609

1611 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "

1612 << "currently\n");

1613 return false;

1614 }

1615

1617

1618

1619

1620 FoundDef = IsDef;

1621

1622

1623 if (FoundDef) {

1624

1625

1626

1627

1628

1629

1630

1631 if (MI.isPseudo()) {

1632 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");

1633 return false;

1634 }

1635

1636 for (auto &MOP : MI.operands()) {

1638 TRI->regsOverlap(MOP.getReg(), RegToRename))

1639 continue;

1641 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);

1642 return false;

1643 }

1644 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));

1645 }

1646 return true;

1647 } else {

1648 for (auto &MOP : MI.operands()) {

1650 TRI->regsOverlap(MOP.getReg(), RegToRename))

1651 continue;

1652

1654 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);

1655 return false;

1656 }

1657 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));

1658 }

1659 }

1660 return true;

1661 };

1662

1664 return false;

1665

1666 if (!FoundDef) {

1667 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");

1668 return false;

1669 }

1670 return true;

1671}

1672

1673

1674

1675

1676

1677

1678

1679

1680

1681

1688 return false;

1689

1690 UsedInBetween.accumulate(FirstLoad);

1692 bool Success = std::all_of(

1695 LLVM_DEBUG(dbgs() << "Checking " << MI);

1696

1697 if (MI.getFlag(MachineInstr::FrameSetup)) {

1698 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "

1699 << "currently\n");

1700 return false;

1701 }

1702

1703 for (auto &MOP : MI.operands()) {

1704 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||

1705 !TRI->regsOverlap(MOP.getReg(), RegToRename))

1706 continue;

1707 if (!canRenameMOP(MOP, TRI)) {

1708 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);

1709 return false;

1710 }

1711 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));

1712 }

1713

1714 return true;

1715 });

1717}

1718

1719

1720

1721

1722

1723

1724

1725

1732

1733

1734 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {

1735 return any_of(TRI->sub_and_superregs_inclusive(PR),

1737 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);

1738 });

1739 };

1740

1741

1742

1743 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {

1746 TRI->sub_and_superregs_inclusive(PR),

1747 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });

1748 });

1749 };

1750

1751 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);

1752 for (const MCPhysReg &PR : *RegClass) {

1754 RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&

1755 CanBeUsedForAllClasses(PR)) {

1756 DefinedInBB.addReg(PR);

1758 << "\n");

1759 return {PR};

1760 }

1761 }

1762 LLVM_DEBUG(dbgs() << "No rename register found from "

1763 << TRI->getRegClassName(RegClass) << "\n");

1764 return std::nullopt;

1765}

1766

1767

1768

1769

1775 std::optional RenameReg;

1777 return RenameReg;

1778

1782 return RenameReg;

1783

1784 const bool IsLoad = FirstMI.mayLoad();

1785

1786 if (!MaybeCanRename) {

1787 if (IsLoad)

1789 RequiredClasses, TRI)};

1790 else

1791 MaybeCanRename = {

1793 }

1794

1795 if (*MaybeCanRename) {

1797 RequiredClasses, TRI);

1798 }

1799 return RenameReg;

1800}

1801

1802

1803

1806 LdStPairFlags &Flags, unsigned Limit,

1807 bool FindNarrowMerge) {

1813

1815 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);

1819 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;

1821

1822 std::optional MaybeCanRename;

1824 MaybeCanRename = {false};

1825

1828 UsedInBetween.init(*TRI);

1829

1830 Flags.clearRenameReg();

1831

1832

1833

1834 ModifiedRegUnits.clear();

1835 UsedRegUnits.clear();

1836

1837

1839

1841 for (unsigned Count = 0; MBBI != E && Count < Limit;

1845

1847

1848

1849

1850 if (MI.isTransient())

1851 ++Count;

1852

1853 Flags.setSExtIdx(-1);

1856 assert(MI.mayLoadOrStore() && "Expected memory operation.");

1857

1858

1859

1860

1861

1862

1865 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);

1866 if (IsUnscaled != MIIsUnscaled) {

1867

1868

1869

1870 int MemSize = TII->getMemScale(MI);

1871 if (MIIsUnscaled) {

1872

1873

1874 if (MIOffset % MemSize) {

1876 UsedRegUnits, TRI);

1878 continue;

1879 }

1880 MIOffset /= MemSize;

1881 } else {

1882 MIOffset *= MemSize;

1883 }

1884 }

1885

1887

1888 if (BaseReg == MIBaseReg) {

1889

1890

1891

1892

1893 if (IsPreLdSt) {

1894 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);

1895 bool IsBaseRegUsed = !UsedRegUnits.available(

1897 bool IsBaseRegModified = !ModifiedRegUnits.available(

1899

1900

1901

1902 bool IsMIRegTheSame =

1905 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||

1906 IsMIRegTheSame) {

1908 UsedRegUnits, TRI);

1910 continue;

1911 }

1912 } else {

1913 if ((Offset != MIOffset + OffsetStride) &&

1914 (Offset + OffsetStride != MIOffset)) {

1916 UsedRegUnits, TRI);

1918 continue;

1919 }

1920 }

1921

1922 int MinOffset = Offset < MIOffset ? Offset : MIOffset;

1923 if (FindNarrowMerge) {

1924

1925

1926

1927

1928 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||

1931 UsedRegUnits, TRI);

1933 continue;

1934 }

1935 } else {

1936

1937

1938

1939

1940 if (inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {

1942 UsedRegUnits, TRI);

1944 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "

1945 << "keep looking.\n");

1946 continue;

1947 }

1948

1949

1950

1951 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {

1953 UsedRegUnits, TRI);

1956 << "Offset doesn't fit due to alignment requirements, "

1957 << "keep looking.\n");

1958 continue;

1959 }

1960 }

1961

1962

1963

1964

1965

1966

1967

1968 if (!ModifiedRegUnits.available(BaseReg))

1969 return E;

1970

1971 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(

1973

1974

1975

1976

1977

1978 bool RtNotModified =

1980 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&

1982

1983 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"

1984 << "Reg '" << getLdStRegOp(MI) << "' not modified: "

1985 << (RtNotModified ? "true" : "false") << "\n"

1987 << (RtNotUsed ? "true" : "false") << "\n");

1988

1989 if (RtNotModified && RtNotUsed && mayAlias(MI, MemInsns, AA)) {

1990

1991

1992

1993 if (SameLoadReg) {

1994 std::optional RenameReg =

1996 Reg, DefinedInBB, UsedInBetween,

1997 RequiredClasses, TRI);

1998 if (!RenameReg) {

2000 UsedRegUnits, TRI);

2002 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "

2003 << "keep looking.\n");

2004 continue;

2005 }

2006 Flags.setRenameReg(*RenameReg);

2007 }

2008

2009 Flags.setMergeForward(false);

2010 if (!SameLoadReg)

2011 Flags.clearRenameReg();

2012 return MBBI;

2013 }

2014

2015

2016

2017

2018

2019 RtNotModified = !(

2021

2022 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"

2024 << "' not modified: "

2025 << (RtNotModified ? "true" : "false") << "\n");

2026

2027 if (RtNotModified && mayAlias(FirstMI, MemInsns, AA)) {

2029 Flags.setMergeForward(true);

2030 Flags.clearRenameReg();

2031 return MBBI;

2032 }

2033

2035 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,

2036 RequiredClasses, TRI);

2037 if (RenameReg) {

2038 Flags.setMergeForward(true);

2039 Flags.setRenameReg(*RenameReg);

2040 MBBIWithRenameReg = MBBI;

2041 }

2042 }

2043 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "

2044 << "interference in between, keep looking.\n");

2045 }

2046 }

2047

2048 if (Flags.getRenameReg())

2049 return MBBIWithRenameReg;

2050

2051

2052

2053 if (MI.isCall()) {

2054 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");

2055 return E;

2056 }

2057

2058

2060

2061

2062

2063 if (!ModifiedRegUnits.available(BaseReg)) {

2064 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");

2065 return E;

2066 }

2067

2068

2069 if (MI.mayLoadOrStore())

2071 }

2072 return E;

2073}

2074

2077 assert((MI.getOpcode() == AArch64::SUBXri ||

2078 MI.getOpcode() == AArch64::ADDXri) &&

2079 "Expected a register update instruction");

2080 auto End = MI.getParent()->end();

2081 if (MaybeCFI == End ||

2082 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||

2085 MI.getOperand(0).getReg() != AArch64::SP)

2086 return End;

2087

2089 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();

2094 return MaybeCFI;

2095 default:

2096 return End;

2097 }

2098}

2099

2100std::optionalMachineBasicBlock::iterator AArch64LoadStoreOpt::mergeUpdateInsn(

2102 bool IsForward, bool IsPreIdx, bool MergeEither) {

2103 assert((Update->getOpcode() == AArch64::ADDXri ||

2104 Update->getOpcode() == AArch64::SUBXri) &&

2105 "Unexpected base register update instruction to merge!");

2108

2109

2110

2111

2112

2114 if (IsForward) {

2117 if (MergeEither) {

2118 InsertPt = Update;

2119 } else {

2120

2121 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {

2122 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;

2123 }))

2124 return std::nullopt;

2125

2128 }

2129 }

2130 }

2131

2132

2133

2134

2135 if (NextI == Update)

2137

2138 int Value = Update->getOperand(2).getImm();

2140 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");

2141 if (Update->getOpcode() == AArch64::SUBXri)

2143

2147 int Scale, MinOffset, MaxOffset;

2150

2151 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),

2152 TII->get(NewOpc))

2153 .add(Update->getOperand(0))

2158 .setMIFlags(I->mergeFlagsWith(*Update));

2159 } else {

2160

2161 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),

2162 TII->get(NewOpc))

2163 .add(Update->getOperand(0))

2169 .setMIFlags(I->mergeFlagsWith(*Update));

2170 }

2171

2172 if (IsPreIdx) {

2173 ++NumPreFolded;

2174 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");

2175 } else {

2176 ++NumPostFolded;

2177 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");

2178 }

2179 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");

2186

2187

2188 I->eraseFromParent();

2189 Update->eraseFromParent();

2190

2191 return NextI;

2192}

2193

2197 unsigned Offset, int Scale) {

2198 assert((Update->getOpcode() == AArch64::MOVKWi) &&

2199 "Unexpected const mov instruction to merge!");

2204 unsigned Mask = (1 << 12) * Scale - 1;

2210

2211

2212 AddMIB =

2213 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))

2216 .addImm(High >> 12)

2217 .addImm(12);

2218 (void)AddMIB;

2219

2221 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))

2226 .setMIFlags(I->mergeFlagsWith(*Update));

2227 (void)MemMIB;

2228

2229 ++NumConstOffsetFolded;

2230 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");

2231 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");

2242

2243

2244 I->eraseFromParent();

2245 PrevI->eraseFromParent();

2246 Update->eraseFromParent();

2247

2248 return NextI;

2249}

2250

2251bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,

2253 unsigned BaseReg, int Offset) {

2254 switch (MI.getOpcode()) {

2255 default:

2256 break;

2257 case AArch64::SUBXri:

2258 case AArch64::ADDXri:

2259

2260

2261 if (MI.getOperand(2).isImm())

2262 break;

2263

2265 break;

2266

2267

2268

2269 if (MI.getOperand(0).getReg() != BaseReg ||

2270 MI.getOperand(1).getReg() != BaseReg)

2271 break;

2272

2273 int UpdateOffset = MI.getOperand(2).getImm();

2274 if (MI.getOpcode() == AArch64::SUBXri)

2275 UpdateOffset = -UpdateOffset;

2276

2277

2278

2279 int Scale, MinOffset, MaxOffset;

2281 if (UpdateOffset % Scale != 0)

2282 break;

2283

2284

2285 int ScaledOffset = UpdateOffset / Scale;

2286 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)

2287 break;

2288

2289

2290

2292 return true;

2293 break;

2294 }

2295 return false;

2296}

2297

2298bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,

2300 unsigned IndexReg,

2302

2303

2304 if (MI.getOpcode() == AArch64::MOVKWi &&

2305 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {

2306

2307

2310

2312 return false;

2315

2316 if (MovzMI.getOpcode() == AArch64::MOVZWi &&

2319 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();

2321

2322 return Offset >> 24 == 0;

2323 }

2324 }

2325 return false;

2326}

2327

2333

2336 TII->getMemScale(MemMI);

2337

2338

2339

2340

2341 if (MIUnscaledOffset != UnscaledOffset)

2342 return E;

2343

2344

2345

2346

2347

2348

2349

2352 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {

2354 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))

2355 return E;

2356 }

2357 }

2358

2359

2360

2361 ModifiedRegUnits.clear();

2362 UsedRegUnits.clear();

2364

2365

2366

2367

2368 const bool BaseRegSP = BaseReg == AArch64::SP;

2370

2371

2372

2373 return E;

2374 }

2375

2376 for (unsigned Count = 0; MBBI != E && Count < Limit;

2379

2380

2381

2382 if (MI.isTransient())

2383 ++Count;

2384

2385

2386 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))

2387 return MBBI;

2388

2389

2391

2392

2393

2394

2395

2396 if (!ModifiedRegUnits.available(BaseReg) ||

2397 !UsedRegUnits.available(BaseReg) ||

2398 (BaseRegSP && MBBI->mayLoadOrStore()))

2399 return E;

2400 }

2401 return E;

2402}

2403

2411

2414

2418 : AArch64::NoRegister};

2419

2420

2421

2423 return E;

2424

2425

2427 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)

2428 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))

2429 return E;

2430 }

2431

2432 const bool BaseRegSP = BaseReg == AArch64::SP;

2434

2435

2436

2437 return E;

2438 }

2439

2441 unsigned RedZoneSize =

2443

2444

2445

2446 ModifiedRegUnits.clear();

2447 UsedRegUnits.clear();

2448 unsigned Count = 0;

2449 bool MemAcessBeforeSPPreInc = false;

2450 MergeEither = true;

2451 do {

2454

2455

2456

2457 if (MI.isTransient())

2458 ++Count;

2459

2460

2461 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {

2462

2463

2464 if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)

2465 return E;

2466 return MBBI;

2467 }

2468

2469

2471

2472

2473

2474 if (!ModifiedRegUnits.available(BaseReg) ||

2475 !UsedRegUnits.available(BaseReg))

2476 return E;

2477

2478

2479

2480

2481

2482 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||

2483 (DestReg[0] != AArch64::NoRegister &&

2484 !(ModifiedRegUnits.available(DestReg[0]) &&

2485 UsedRegUnits.available(DestReg[0]))) ||

2486 (DestReg[1] != AArch64::NoRegister &&

2487 !(ModifiedRegUnits.available(DestReg[1]) &&

2488 UsedRegUnits.available(DestReg[1]))))

2489 MergeEither = false;

2490

2491

2492

2493

2494 if (BaseRegSP && MBBI->mayLoadOrStore())

2495 MemAcessBeforeSPPreInc = true;

2496 } while (MBBI != B && Count < Limit);

2497 return E;

2498}

2499

2501AArch64LoadStoreOpt::findMatchingConstOffsetBackward(

2507

2508

2509

2511 return E;

2512

2513

2514

2518 return E;

2519

2521

2522

2523

2524 ModifiedRegUnits.clear();

2525 UsedRegUnits.clear();

2526 unsigned Count = 0;

2527 do {

2530

2531

2532

2533 if (MI.isTransient())

2534 ++Count;

2535

2536

2537 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {

2538 return MBBI;

2539 }

2540

2541

2543

2544

2545

2546 if (!ModifiedRegUnits.available(IndexReg) ||

2547 !UsedRegUnits.available(IndexReg))

2548 return E;

2549

2550 } while (MBBI != B && Count < Limit);

2551 return E;

2552}

2553

2554bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(

2557

2558 if (MI.hasOrderedMemoryRef())

2559 return false;

2560

2562 return false;

2563

2564

2565

2567 return false;

2568

2569

2571 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {

2572 ++NumLoadsFromStoresPromoted;

2573

2574

2575

2576 MBBI = promoteLoadFromStore(MBBI, StoreI);

2577 return true;

2578 }

2579 return false;

2580}

2581

2582

2583bool AArch64LoadStoreOpt::tryToMergeZeroStInst(

2588

2589 if (TII->isCandidateToMergeOrPair(MI))

2590 return false;

2591

2592

2593 LdStPairFlags Flags;

2595 findMatchingInsn(MBBI, Flags, LdStLimit, true);

2596 if (MergeMI != E) {

2597 ++NumZeroStoresPromoted;

2598

2599

2600

2601 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);

2602 return true;

2603 }

2604 return false;

2605}

2606

2607

2608

2612

2613 if (TII->isCandidateToMergeOrPair(MI))

2614 return false;

2615

2616

2617 if (MI.mayLoad() && Subtarget->hasDisableLdp())

2618 return false;

2619

2620

2621 if (MI.mayStore() && Subtarget->hasDisableStp())

2622 return false;

2623

2624

2625

2626

2627 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);

2629 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;

2630

2632 Offset -= OffsetStride;

2634 return false;

2635

2636

2637 LdStPairFlags Flags;

2639 findMatchingInsn(MBBI, Flags, LdStLimit, false);

2640 if (Paired != E) {

2641

2642

2643 auto Prev = std::prev(MBBI);

2644

2645

2646

2648 MI.memoperands_empty() ? nullptr : MI.memoperands().front();

2649

2650

2651

2652

2653 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||

2654 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {

2655

2656 if (MemOp || MemOp->getMemoryType().isValid()) {

2657 NumFailedAlignmentCheck++;

2658 return false;

2659 }

2660

2661

2662

2663 uint64_t MemAlignment = MemOp->getAlign().value();

2665

2666 if (MemAlignment < 2 * TypeAlignment) {

2667 NumFailedAlignmentCheck++;

2668 return false;

2669 }

2670 }

2671

2672 ++NumPairCreated;

2673 if (TII->hasUnscaledLdStOffset(MI))

2674 ++NumUnscaledPairCreated;

2675

2676 MBBI = mergePairedInsns(MBBI, Paired, Flags);

2677

2678

2679 for (auto I = std::next(Prev); I != MBBI; I++)

2681

2682 return true;

2683 }

2684 return false;

2685}

2686

2687bool AArch64LoadStoreOpt::tryToMergeLdStUpdate

2692

2693

2694

2695

2696

2697

2698 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);

2699 if (Update != E) {

2700

2701 if (auto NextI = mergeUpdateInsn(MBBI, Update, false,

2702 false,

2703 false)) {

2704 MBBI = *NextI;

2705 return true;

2706 }

2707 }

2708

2709

2710 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))

2711 return false;

2712

2713

2714

2715

2716

2717

2718 bool MergeEither;

2719 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);

2720 if (Update != E) {

2721

2722 if (auto NextI = mergeUpdateInsn(MBBI, Update, true,

2723 true, MergeEither)) {

2724 MBBI = *NextI;

2725 return true;

2726 }

2727 }

2728

2729

2730

2731

2732 int UnscaledOffset =

2734

2735

2736

2737

2738

2739

2740 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);

2741 if (Update != E) {

2742

2743 if (auto NextI = mergeUpdateInsn(MBBI, Update, false,

2744 true,

2745 false)) {

2746 MBBI = *NextI;

2747 return true;

2748 }

2749 }

2750

2751 return false;

2752}

2753

2755 int Scale) {

2759

2760

2761 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))

2762 return false;

2763

2764

2765

2766

2767

2768

2769

2770

2773 if (Update != E && (Offset & (Scale - 1)) == 0) {

2774

2775 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);

2776 return true;

2777 }

2778

2779 return false;

2780}

2781

2783 bool EnableNarrowZeroStOpt) {

2785

2787

2788

2789

2790

2791

2792

2793

2794

2795

2796

2798 MBBI != E;) {

2801 else

2803 }

2804

2805

2806

2807

2808

2809

2810

2811

2812

2813

2814

2815 if (EnableNarrowZeroStOpt)

2817 MBBI != E;) {

2820 else

2822 }

2823

2824

2825

2826

2827

2828

2829

2830

2832 DefinedInBB.clear();

2833 DefinedInBB.addLiveIns(MBB);

2834 }

2835

2837 MBBI != E;) {

2838

2839

2841 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))

2843 else

2845 }

2846

2847

2848

2849

2850

2851

2852

2854 MBBI != E;) {

2857 else

2859 }

2860

2861

2862

2863

2864

2865

2866

2867

2869 MBBI != E;) {

2870 int Scale;

2873 else

2875 }

2876

2878}

2879

2880bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

2882 return false;

2883

2887 AA = &getAnalysis().getAAResults();

2888

2889

2890

2891

2892 ModifiedRegUnits.init(*TRI);

2893 UsedRegUnits.init(*TRI);

2894 DefinedInBB.init(*TRI);

2895

2897 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();

2898 for (auto &MBB : Fn) {

2901 }

2902

2904}

2905

2906

2907

2908

2909

2910

2911

2912

2913

2914

2915

2916

2917

2919 return new AArch64LoadStoreOpt();

2920}

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)

static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)

static bool isPromotableLoadFromStore(MachineInstr &MI)

static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)

static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)

static unsigned getMatchingPairOpcode(unsigned Opc)

static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)

static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

static bool needsWinCFI(const MachineFunction *MF)

static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)

static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)

static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)

static unsigned getPreIndexedOpcode(unsigned Opc)

#define AARCH64_LOAD_STORE_OPT_NAME

static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)

static bool isPromotableZeroStoreInst(MachineInstr &MI)

static unsigned getMatchingWideOpcode(unsigned Opc)

static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)

static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)

static int alignTo(int Num, int PowOf2)

static bool isTagStore(const MachineInstr &MI)

static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)

static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)

static bool isRewritableImplicitDef(unsigned Opc)

static unsigned getPostIndexedOpcode(unsigned Opc)

static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)

static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)

static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)

static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)

static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)

static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)

static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

static unsigned getBaseAddressOpcode(unsigned Opc)

SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file provides an implementation of debug counters.

#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)

const HexagonInstrInfo * TII

unsigned const TargetRegisterInfo * TRI

static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)

static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...

static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)

Returns the immediate offset operator of a load/store.

static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)

Returns the shift amount operator of a load/store.

static bool isPreLdSt(const MachineInstr &MI)

Returns whether the instruction is a pre-indexed load/store.

static bool isPairedLdSt(const MachineInstr &MI)

Returns whether the instruction is a paired load/store.

static int getMemScale(unsigned Opc)

Scaling factor for (scaled or unscaled) load or store.

static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)

Returns the base register operator of a load/store.

const AArch64RegisterInfo * getRegisterInfo() const override

const AArch64InstrInfo * getInstrInfo() const override

const AArch64TargetLowering * getTargetLowering() const override

unsigned getRedZoneSize(const Function &F) const

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

static bool shouldExecute(unsigned CounterName)

FunctionPass class - This class is used to implement most global optimizations.

bool needsUnwindTableEntry() const

True if this function needs an unwind table.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

A set of register units used to track register liveness.

static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)

For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...

bool available(MCPhysReg Reg) const

Returns true if no part of physical register Reg is live.

void init(const TargetRegisterInfo &TRI)

Initialize and clear the set.

void addReg(MCPhysReg Reg)

Adds register units covered by physical register Reg.

void removeReg(MCPhysReg Reg)

Removes all register units covered by physical register Reg.

void accumulate(const MachineInstr &MI)

Adds all register units used, defined or clobbered in MI.

An instruction for reading from memory.

bool usesWindowsCFI() const

OpType getOperation() const

Wrapper class representing physical registers. Should be passed by value.

reverse_instr_iterator instr_rend()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

virtual MachineFunctionProperties getRequiredProperties() const

Properties which a MachineFunction may have at a given point in time.

MachineFunctionProperties & set(Property P)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

const std::vector< MCCFIInstruction > & getFrameInstructions() const

Returns a reference to a list of cfi instructions in the function's prologue.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const

const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

const MachineInstrBuilder & setMIFlags(unsigned Flags) const

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const

Returns true if this instruction's memory access aliases the memory access of Other.

bool mayLoad(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read memory.

iterator_range< mop_iterator > operands()

bool hasOrderedMemoryRef() const

Return true if this instruction may have an ordered or volatile memory reference, or if the informati...

const MachineFunction * getMF() const

Return the function that contains the basic block that this instruction belongs to.

bool mayStore(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly modify memory.

bool isPseudo(QueryType Type=IgnoreBundle) const

Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

MachineOperand class - Representation of each machine instruction operand.

void setImplicit(bool Val=true)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

void setReg(Register Reg)

Change the register this operand corresponds to.

void setIsKill(bool Val=true)

bool isRenamable() const

isRenamable - Returns true if this register may be renamed, i.e.

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

bool isEarlyClobber() const

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

bool tracksLiveness() const

tracksLiveness - Returns true when tracking register liveness accurately.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

Wrapper class representing virtual and physical registers.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

StringRef - Represent a constant reference to a string, i.e.

const MCAsmInfo * getMCAsmInfo() const

Return target specific asm information.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

LLVM Value Representation.

self_iterator getIterator()

A range adaptor for a pair of iterators.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

static unsigned getShiftValue(unsigned Imm)

getShiftValue - Extract the shift value.

static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)

getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Define

Register definition.

Reg

All possible values of the reg field in the ModR/M byte.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)

Increment It, then continue incrementing it while it points to a debug instruction.

@ Low

Lower the current thread's priority such that it does not affect foreground tasks significantly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)

Returns an iterator range over all physical register and mask operands for MI and bundled instruction...

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionPass * createAArch64LoadStoreOptimizationPass()

createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.

auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)

Construct a range iterator which begins at It and moves forwards until End is reached,...

void initializeAArch64LoadStoreOptPass(PassRegistry &)

IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)

Decrement It, then continue decrementing it while it points to a debug instruction.

Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

This struct is a compact representation of a valid (non-zero power of two) alignment.

uint64_t value() const

This is a hole in the type system and should not be abused.