LLVM: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

45#include

46#include

47#include

48#include

49#include

50#include

51

52using namespace llvm;

53

54#define DEBUG_TYPE "aarch64-ldst-opt"

55

56STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");

57STATISTIC(NumPostFolded, "Number of post-index updates folded");

58STATISTIC(NumPreFolded, "Number of pre-index updates folded");

60 "Number of load/store from unscaled generated");

61STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");

62STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");

63STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "

64 "not passed the alignment check");

66 "Number of const offset of index address folded");

67

69 "Controls which pairs are considered for renaming");

70

71

74

75

76

79

80

81

84

85

88

89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"

90

91namespace {

92

93using LdStPairFlags = struct LdStPairFlags {

94

95

96

97 bool MergeForward = false;

98

99

100

101

102

103 int SExtIdx = -1;

104

105

106

107

108 std::optional RenameReg;

109

110 LdStPairFlags() = default;

111

112 void setMergeForward(bool V = true) { MergeForward = V; }

113 bool getMergeForward() const { return MergeForward; }

114

115 void setSExtIdx(int V) { SExtIdx = V; }

116 int getSExtIdx() const { return SExtIdx; }

117

118 void setRenameReg(MCPhysReg R) { RenameReg = R; }

119 void clearRenameReg() { RenameReg = std::nullopt; }

120 std::optional getRenameReg() const { return RenameReg; }

121};

122

124 static char ID;

125

127

132

133

136

137 void getAnalysisUsage(AnalysisUsage &AU) const override {

140 }

141

142

143

144

146 LdStPairFlags &Flags,

147 unsigned Limit,

148 bool FindNarrowMerge);

149

150

151

154

155

159 const LdStPairFlags &Flags);

160

161

165 const LdStPairFlags &Flags);

166

167

171

172

173

174

177 int UnscaledOffset, unsigned Limit);

178

179

180

181

185

186

187

188

189

190

191

194 bool &MergeEither);

195

196

197

199 unsigned BaseReg, int Offset);

200

202 unsigned IndexReg, unsigned &Offset);

203

204

205 std::optionalMachineBasicBlock::iterator

208 bool IsPreIdx, bool MergeEither);

209

213 int Scale);

214

215

217

218

220

221

223

224

226

227

229

231

233

236 }

237

239};

240

241char AArch64LoadStoreOpt::ID = 0;

242

243}

244

247

248static bool isNarrowStore(unsigned Opc) {

249 switch (Opc) {

250 default:

251 return false;

252 case AArch64::STRBBui:

253 case AArch64::STURBBi:

254 case AArch64::STRHHui:

255 case AArch64::STURHHi:

256 return true;

257 }

258}

259

260

261

263 switch (MI.getOpcode()) {

264 default:

265 return false;

266 case AArch64::STGi:

267 case AArch64::STZGi:

268 case AArch64::ST2Gi:

269 case AArch64::STZ2Gi:

270 return true;

271 }

272}

273

275 bool *IsValidLdStrOpc = nullptr) {

276 if (IsValidLdStrOpc)

277 *IsValidLdStrOpc = true;

278 switch (Opc) {

279 default:

280 if (IsValidLdStrOpc)

281 *IsValidLdStrOpc = false;

282 return std::numeric_limits::max();

283 case AArch64::STRDui:

284 case AArch64::STURDi:

285 case AArch64::STRDpre:

286 case AArch64::STRQui:

287 case AArch64::STURQi:

288 case AArch64::STRQpre:

289 case AArch64::STRBBui:

290 case AArch64::STURBBi:

291 case AArch64::STRHHui:

292 case AArch64::STURHHi:

293 case AArch64::STRWui:

294 case AArch64::STRWpre:

295 case AArch64::STURWi:

296 case AArch64::STRXui:

297 case AArch64::STRXpre:

298 case AArch64::STURXi:

299 case AArch64::STR_ZXI:

300 case AArch64::LDRDui:

301 case AArch64::LDURDi:

302 case AArch64::LDRDpre:

303 case AArch64::LDRQui:

304 case AArch64::LDURQi:

305 case AArch64::LDRQpre:

306 case AArch64::LDRWui:

307 case AArch64::LDURWi:

308 case AArch64::LDRWpre:

309 case AArch64::LDRXui:

310 case AArch64::LDURXi:

311 case AArch64::LDRXpre:

312 case AArch64::STRSui:

313 case AArch64::STURSi:

314 case AArch64::STRSpre:

315 case AArch64::LDRSui:

316 case AArch64::LDURSi:

317 case AArch64::LDRSpre:

318 case AArch64::LDR_ZXI:

319 return Opc;

320 case AArch64::LDRSWui:

321 return AArch64::LDRWui;

322 case AArch64::LDURSWi:

323 return AArch64::LDURWi;

324 case AArch64::LDRSWpre:

325 return AArch64::LDRWpre;

326 }

327}

328

330 switch (Opc) {

331 default:

333 case AArch64::STRBBui:

334 return AArch64::STRHHui;

335 case AArch64::STRHHui:

336 return AArch64::STRWui;

337 case AArch64::STURBBi:

338 return AArch64::STURHHi;

339 case AArch64::STURHHi:

340 return AArch64::STURWi;

341 case AArch64::STURWi:

342 return AArch64::STURXi;

343 case AArch64::STRWui:

344 return AArch64::STRXui;

345 }

346}

347

349 switch (Opc) {

350 default:

352 case AArch64::STRSui:

353 case AArch64::STURSi:

354 return AArch64::STPSi;

355 case AArch64::STRSpre:

356 return AArch64::STPSpre;

357 case AArch64::STRDui:

358 case AArch64::STURDi:

359 return AArch64::STPDi;

360 case AArch64::STRDpre:

361 return AArch64::STPDpre;

362 case AArch64::STRQui:

363 case AArch64::STURQi:

364 case AArch64::STR_ZXI:

365 return AArch64::STPQi;

366 case AArch64::STRQpre:

367 return AArch64::STPQpre;

368 case AArch64::STRWui:

369 case AArch64::STURWi:

370 return AArch64::STPWi;

371 case AArch64::STRWpre:

372 return AArch64::STPWpre;

373 case AArch64::STRXui:

374 case AArch64::STURXi:

375 return AArch64::STPXi;

376 case AArch64::STRXpre:

377 return AArch64::STPXpre;

378 case AArch64::LDRSui:

379 case AArch64::LDURSi:

380 return AArch64::LDPSi;

381 case AArch64::LDRSpre:

382 return AArch64::LDPSpre;

383 case AArch64::LDRDui:

384 case AArch64::LDURDi:

385 return AArch64::LDPDi;

386 case AArch64::LDRDpre:

387 return AArch64::LDPDpre;

388 case AArch64::LDRQui:

389 case AArch64::LDURQi:

390 case AArch64::LDR_ZXI:

391 return AArch64::LDPQi;

392 case AArch64::LDRQpre:

393 return AArch64::LDPQpre;

394 case AArch64::LDRWui:

395 case AArch64::LDURWi:

396 return AArch64::LDPWi;

397 case AArch64::LDRWpre:

398 return AArch64::LDPWpre;

399 case AArch64::LDRXui:

400 case AArch64::LDURXi:

401 return AArch64::LDPXi;

402 case AArch64::LDRXpre:

403 return AArch64::LDPXpre;

404 case AArch64::LDRSWui:

405 case AArch64::LDURSWi:

406 return AArch64::LDPSWi;

407 case AArch64::LDRSWpre:

408 return AArch64::LDPSWpre;

409 }

410}

411

416 switch (LdOpc) {

417 default:

419 case AArch64::LDRBBui:

420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||

421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;

422 case AArch64::LDURBBi:

423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||

424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;

425 case AArch64::LDRHHui:

426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||

427 StOpc == AArch64::STRXui;

428 case AArch64::LDURHHi:

429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||

430 StOpc == AArch64::STURXi;

431 case AArch64::LDRWui:

432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;

433 case AArch64::LDURWi:

434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;

435 case AArch64::LDRXui:

436 return StOpc == AArch64::STRXui;

437 case AArch64::LDURXi:

438 return StOpc == AArch64::STURXi;

439 }

440}

441

443

444

445

446

447 switch (Opc) {

448 default:

450 case AArch64::STRSui:

451 return AArch64::STRSpre;

452 case AArch64::STRDui:

453 return AArch64::STRDpre;

454 case AArch64::STRQui:

455 return AArch64::STRQpre;

456 case AArch64::STRBBui:

457 return AArch64::STRBBpre;

458 case AArch64::STRHHui:

459 return AArch64::STRHHpre;

460 case AArch64::STRWui:

461 return AArch64::STRWpre;

462 case AArch64::STRXui:

463 return AArch64::STRXpre;

464 case AArch64::LDRSui:

465 return AArch64::LDRSpre;

466 case AArch64::LDRDui:

467 return AArch64::LDRDpre;

468 case AArch64::LDRQui:

469 return AArch64::LDRQpre;

470 case AArch64::LDRBBui:

471 return AArch64::LDRBBpre;

472 case AArch64::LDRHHui:

473 return AArch64::LDRHHpre;

474 case AArch64::LDRWui:

475 return AArch64::LDRWpre;

476 case AArch64::LDRXui:

477 return AArch64::LDRXpre;

478 case AArch64::LDRSWui:

479 return AArch64::LDRSWpre;

480 case AArch64::LDPSi:

481 return AArch64::LDPSpre;

482 case AArch64::LDPSWi:

483 return AArch64::LDPSWpre;

484 case AArch64::LDPDi:

485 return AArch64::LDPDpre;

486 case AArch64::LDPQi:

487 return AArch64::LDPQpre;

488 case AArch64::LDPWi:

489 return AArch64::LDPWpre;

490 case AArch64::LDPXi:

491 return AArch64::LDPXpre;

492 case AArch64::STPSi:

493 return AArch64::STPSpre;

494 case AArch64::STPDi:

495 return AArch64::STPDpre;

496 case AArch64::STPQi:

497 return AArch64::STPQpre;

498 case AArch64::STPWi:

499 return AArch64::STPWpre;

500 case AArch64::STPXi:

501 return AArch64::STPXpre;

502 case AArch64::STGi:

503 return AArch64::STGPreIndex;

504 case AArch64::STZGi:

505 return AArch64::STZGPreIndex;

506 case AArch64::ST2Gi:

507 return AArch64::ST2GPreIndex;

508 case AArch64::STZ2Gi:

509 return AArch64::STZ2GPreIndex;

510 case AArch64::STGPi:

511 return AArch64::STGPpre;

512 }

513}

514

516

517 switch (Opc) {

518 default:

520 case AArch64::LDRBroX:

521 return AArch64::LDRBui;

522 case AArch64::LDRBBroX:

523 return AArch64::LDRBBui;

524 case AArch64::LDRSBXroX:

525 return AArch64::LDRSBXui;

526 case AArch64::LDRSBWroX:

527 return AArch64::LDRSBWui;

528 case AArch64::LDRHroX:

529 return AArch64::LDRHui;

530 case AArch64::LDRHHroX:

531 return AArch64::LDRHHui;

532 case AArch64::LDRSHXroX:

533 return AArch64::LDRSHXui;

534 case AArch64::LDRSHWroX:

535 return AArch64::LDRSHWui;

536 case AArch64::LDRWroX:

537 return AArch64::LDRWui;

538 case AArch64::LDRSroX:

539 return AArch64::LDRSui;

540 case AArch64::LDRSWroX:

541 return AArch64::LDRSWui;

542 case AArch64::LDRDroX:

543 return AArch64::LDRDui;

544 case AArch64::LDRXroX:

545 return AArch64::LDRXui;

546 case AArch64::LDRQroX:

547 return AArch64::LDRQui;

548 }

549}

550

552 switch (Opc) {

553 default:

554 llvm_unreachable("Opcode has no post-indexed wise equivalent!");

555 case AArch64::STRSui:

556 case AArch64::STURSi:

557 return AArch64::STRSpost;

558 case AArch64::STRDui:

559 case AArch64::STURDi:

560 return AArch64::STRDpost;

561 case AArch64::STRQui:

562 case AArch64::STURQi:

563 return AArch64::STRQpost;

564 case AArch64::STRBBui:

565 return AArch64::STRBBpost;

566 case AArch64::STRHHui:

567 return AArch64::STRHHpost;

568 case AArch64::STRWui:

569 case AArch64::STURWi:

570 return AArch64::STRWpost;

571 case AArch64::STRXui:

572 case AArch64::STURXi:

573 return AArch64::STRXpost;

574 case AArch64::LDRSui:

575 case AArch64::LDURSi:

576 return AArch64::LDRSpost;

577 case AArch64::LDRDui:

578 case AArch64::LDURDi:

579 return AArch64::LDRDpost;

580 case AArch64::LDRQui:

581 case AArch64::LDURQi:

582 return AArch64::LDRQpost;

583 case AArch64::LDRBBui:

584 return AArch64::LDRBBpost;

585 case AArch64::LDRHHui:

586 return AArch64::LDRHHpost;

587 case AArch64::LDRWui:

588 case AArch64::LDURWi:

589 return AArch64::LDRWpost;

590 case AArch64::LDRXui:

591 case AArch64::LDURXi:

592 return AArch64::LDRXpost;

593 case AArch64::LDRSWui:

594 return AArch64::LDRSWpost;

595 case AArch64::LDPSi:

596 return AArch64::LDPSpost;

597 case AArch64::LDPSWi:

598 return AArch64::LDPSWpost;

599 case AArch64::LDPDi:

600 return AArch64::LDPDpost;

601 case AArch64::LDPQi:

602 return AArch64::LDPQpost;

603 case AArch64::LDPWi:

604 return AArch64::LDPWpost;

605 case AArch64::LDPXi:

606 return AArch64::LDPXpost;

607 case AArch64::STPSi:

608 return AArch64::STPSpost;

609 case AArch64::STPDi:

610 return AArch64::STPDpost;

611 case AArch64::STPQi:

612 return AArch64::STPQpost;

613 case AArch64::STPWi:

614 return AArch64::STPWpost;

615 case AArch64::STPXi:

616 return AArch64::STPXpost;

617 case AArch64::STGi:

618 return AArch64::STGPostIndex;

619 case AArch64::STZGi:

620 return AArch64::STZGPostIndex;

621 case AArch64::ST2Gi:

622 return AArch64::ST2GPostIndex;

623 case AArch64::STZ2Gi:

624 return AArch64::STZ2GPostIndex;

625 case AArch64::STGPi:

626 return AArch64::STGPpost;

627 }

628}

629

631

632 unsigned OpcA = FirstMI.getOpcode();

633 unsigned OpcB = MI.getOpcode();

634

635 switch (OpcA) {

636 default:

637 return false;

638 case AArch64::STRSpre:

639 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);

640 case AArch64::STRDpre:

641 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);

642 case AArch64::STRQpre:

643 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);

644 case AArch64::STRWpre:

645 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);

646 case AArch64::STRXpre:

647 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);

648 case AArch64::LDRSpre:

649 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);

650 case AArch64::LDRDpre:

651 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);

652 case AArch64::LDRQpre:

653 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);

654 case AArch64::LDRWpre:

655 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);

656 case AArch64::LDRXpre:

657 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);

658 case AArch64::LDRSWpre:

659 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);

660 }

661}

662

663

665 int &MinOffset, int &MaxOffset) {

668

669

670

672

673 if (IsPaired) {

674 MinOffset = -64;

675 MaxOffset = 63;

676 } else {

677 MinOffset = -256;

678 MaxOffset = 255;

679 }

680}

681

683 unsigned PairedRegOp = 0) {

684 assert(PairedRegOp < 2 && "Unexpected register operand idx.");

686 if (IsPreLdSt)

687 PairedRegOp += 1;

688 unsigned Idx =

690 return MI.getOperand(Idx);

691}

692

697 int LoadSize = TII->getMemScale(LoadInst);

699 int UnscaledStOffset =

703 int UnscaledLdOffset =

707 return (UnscaledStOffset <= UnscaledLdOffset) &&

708 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));

709}

710

712 unsigned Opc = MI.getOpcode();

713 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||

714 isNarrowStore(Opc)) &&

716}

717

719 switch (MI.getOpcode()) {

720 default:

721 return false;

722

723 case AArch64::LDRBBui:

724 case AArch64::LDRHHui:

725 case AArch64::LDRWui:

726 case AArch64::LDRXui:

727

728 case AArch64::LDURBBi:

729 case AArch64::LDURHHi:

730 case AArch64::LDURWi:

731 case AArch64::LDURXi:

732 return true;

733 }

734}

735

737 unsigned Opc = MI.getOpcode();

738 switch (Opc) {

739 default:

740 return false;

741

742 case AArch64::STRSui:

743 case AArch64::STRDui:

744 case AArch64::STRQui:

745 case AArch64::STRXui:

746 case AArch64::STRWui:

747 case AArch64::STRHHui:

748 case AArch64::STRBBui:

749 case AArch64::LDRSui:

750 case AArch64::LDRDui:

751 case AArch64::LDRQui:

752 case AArch64::LDRXui:

753 case AArch64::LDRWui:

754 case AArch64::LDRHHui:

755 case AArch64::LDRBBui:

756 case AArch64::STGi:

757 case AArch64::STZGi:

758 case AArch64::ST2Gi:

759 case AArch64::STZ2Gi:

760 case AArch64::STGPi:

761

762 case AArch64::STURSi:

763 case AArch64::STURDi:

764 case AArch64::STURQi:

765 case AArch64::STURWi:

766 case AArch64::STURXi:

767 case AArch64::LDURSi:

768 case AArch64::LDURDi:

769 case AArch64::LDURQi:

770 case AArch64::LDURWi:

771 case AArch64::LDURXi:

772

773 case AArch64::LDPSi:

774 case AArch64::LDPSWi:

775 case AArch64::LDPDi:

776 case AArch64::LDPQi:

777 case AArch64::LDPWi:

778 case AArch64::LDPXi:

779 case AArch64::STPSi:

780 case AArch64::STPDi:

781 case AArch64::STPQi:

782 case AArch64::STPWi:

783 case AArch64::STPXi:

784

786 return false;

787

788

789

790

791

792

795 return false;

796

797 return true;

798 }

799}

800

801

803 unsigned Opc = MI.getOpcode();

804 switch (Opc) {

805 default:

806 return false;

807

808

809 case AArch64::LDRBroX:

810 case AArch64::LDRBBroX:

811 case AArch64::LDRSBXroX:

812 case AArch64::LDRSBWroX:

813 Scale = 1;

814 return true;

815 case AArch64::LDRHroX:

816 case AArch64::LDRHHroX:

817 case AArch64::LDRSHXroX:

818 case AArch64::LDRSHWroX:

819 Scale = 2;

820 return true;

821 case AArch64::LDRWroX:

822 case AArch64::LDRSroX:

823 case AArch64::LDRSWroX:

824 Scale = 4;

825 return true;

826 case AArch64::LDRDroX:

827 case AArch64::LDRXroX:

828 Scale = 8;

829 return true;

830 case AArch64::LDRQroX:

831 Scale = 16;

832 return true;

833 }

834}

835

837 switch (Opc) {

838 default:

839 return false;

840 case AArch64::ORRWrs:

841 case AArch64::ADDWri:

842 return true;

843 }

844}

845

849 const LdStPairFlags &Flags) {

851 "Expected promotable zero stores.");

852

855

856

857

858

859 if (NextI == MergeMI)

861

862 unsigned Opc = I->getOpcode();

863 unsigned MergeMIOpc = MergeMI->getOpcode();

864 bool IsScaled = TII->hasUnscaledLdStOffset(Opc);

865 bool IsMergedMIScaled = TII->hasUnscaledLdStOffset(MergeMIOpc);

866 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;

867 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;

868

869 bool MergeForward = Flags.getMergeForward();

870

871

873

874

875 const MachineOperand &BaseRegOp =

877 : AArch64InstrInfo::getLdStBaseOp(*I);

878

879

880 int64_t IOffsetInBytes =

882 int64_t MIOffsetInBytes =

884 MergeMIOffsetStride;

885

886 int64_t OffsetImm;

887 if (IOffsetInBytes > MIOffsetInBytes)

888 OffsetImm = MIOffsetInBytes;

889 else

890 OffsetImm = IOffsetInBytes;

891

893

894

895 if (TII->hasUnscaledLdStOffset(NewOpcode)) {

896 int NewOffsetStride = TII->getMemScale(NewOpcode);

897 assert(((OffsetImm % NewOffsetStride) == 0) &&

898 "Offset should be a multiple of the store memory scale");

899 OffsetImm = OffsetImm / NewOffsetStride;

900 }

901

902

905 MachineInstrBuilder MIB;

907 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)

908 .add(BaseRegOp)

911 .setMIFlags(I->mergeFlagsWith(*MergeMI));

912 (void)MIB;

913

914 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");

921

922

923 I->eraseFromParent();

924 MergeMI->eraseFromParent();

925 return NextI;

926}

927

928

929

930

933 std::function<bool(MachineInstr &, bool)> &Fn) {

934 auto MBB = MI.getParent();

937 if (!Limit)

938 return false;

939 --Limit;

940

942 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&

943 TRI->regsOverlap(MOP.getReg(), DefReg);

944 });

945 if (!Fn(I, isDef))

946 return false;

947 if (isDef)

948 break;

949 }

950 return true;

951}

952

955

957 if (MOP.isReg() && MOP.isKill())

959

961 if (MOP.isReg() && !MOP.isKill())

962 Units.addReg(MOP.getReg());

963}

964

965

966

967

969 unsigned InstrNumToSet,

972

973

974

976 unsigned OperandNo = 0;

977 bool RegFound = false;

978 for (const auto Op : MergedInstr.operands()) {

979 if (Op.getReg() == Reg) {

980 RegFound = true;

981 break;

982 }

983 OperandNo++;

984 }

985

986 if (RegFound)

988 {InstrNumToSet, OperandNo});

989}

990

994 const LdStPairFlags &Flags) {

997

998

999

1000

1001 if (NextI == Paired)

1003

1004 int SExtIdx = Flags.getSExtIdx();

1005 unsigned Opc =

1007 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);

1008 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;

1009

1010 bool MergeForward = Flags.getMergeForward();

1011

1012 std::optional RenameReg = Flags.getRenameReg();

1013 if (RenameReg) {

1015 DefinedInBB.addReg(*RenameReg);

1016

1017

1018

1019 auto GetMatchingSubReg =

1020 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {

1022 TRI->sub_and_superregs_inclusive(*RenameReg)) {

1023 if (C->contains(SubOrSuper))

1024 return SubOrSuper;

1025 }

1026 llvm_unreachable("Should have found matching sub or super register!");

1027 };

1028

1029 std::function<bool(MachineInstr &, bool)> UpdateMIs =

1030 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,

1031 bool IsDef) {

1032 if (IsDef) {

1033 bool SeenDef = false;

1034 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {

1035 MachineOperand &MOP = MI.getOperand(OpIdx);

1036

1037

1039 (!MergeForward || !SeenDef ||

1041 TRI->regsOverlap(MOP.getReg(), RegToRename)) {

1044 "Need renamable operands");

1046 if (const TargetRegisterClass *RC =

1048 MatchingReg = GetMatchingSubReg(RC);

1049 else {

1051 continue;

1052 MatchingReg = GetMatchingSubReg(

1053 TRI->getMinimalPhysRegClass(MOP.getReg()));

1054 }

1055 MOP.setReg(MatchingReg);

1056 SeenDef = true;

1057 }

1058 }

1059 } else {

1060 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {

1061 MachineOperand &MOP = MI.getOperand(OpIdx);

1063 TRI->regsOverlap(MOP.getReg(), RegToRename)) {

1066 "Need renamable operands");

1068 if (const TargetRegisterClass *RC =

1070 MatchingReg = GetMatchingSubReg(RC);

1071 else

1072 MatchingReg = GetMatchingSubReg(

1073 TRI->getMinimalPhysRegClass(MOP.getReg()));

1074 assert(MatchingReg != AArch64::NoRegister &&

1075 "Cannot find matching regs for renaming");

1076 MOP.setReg(MatchingReg);

1077 }

1078 }

1079 }

1081 return true;

1082 };

1083 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,

1084 TRI, UINT32_MAX, UpdateMIs);

1085

1086#if !defined(NDEBUG)

1087

1088

1089

1090

1091 MCPhysReg RegToCheck = *RenameReg;

1092

1093

1094

1095

1096 if (!MergeForward)

1097 RegToCheck = RegToRename;

1098 for (auto &MI :

1099 iterator_range<MachineInstrBundleIteratorllvm::MachineInstr>(

1100 MergeForward ? std::next(I) : I,

1101 MergeForward ? std::next(Paired) : Paired))

1103 [this, RegToCheck](const MachineOperand &MOP) {

1104 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||

1105 MOP.isUndef() ||

1106 !TRI->regsOverlap(MOP.getReg(), RegToCheck);

1107 }) &&

1108 "Rename register used between paired instruction, trashing the "

1109 "content");

1110#endif

1111 }

1112

1113

1114

1116

1117

1118 const MachineOperand &BaseRegOp =

1120 : AArch64InstrInfo::getLdStBaseOp(*I);

1121

1124 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());

1125 if (IsUnscaled != PairedIsUnscaled) {

1126

1127

1128

1129 int MemSize = TII->getMemScale(*Paired);

1130 if (PairedIsUnscaled) {

1131

1132

1133 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&

1134 "Offset should be a multiple of the stride!");

1135 PairedOffset /= MemSize;

1136 } else {

1137 PairedOffset *= MemSize;

1138 }

1139 }

1140

1141

1142

1143

1144 MachineInstr *RtMI, *Rt2MI;

1145 if (Offset == PairedOffset + OffsetStride &&

1147 RtMI = &*Paired;

1148 Rt2MI = &*I;

1149

1150

1151

1152 if (SExtIdx != -1)

1153 SExtIdx = (SExtIdx + 1) % 2;

1154 } else {

1155 RtMI = &*I;

1156 Rt2MI = &*Paired;

1157 }

1159

1160 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {

1161 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&

1162 "Unscaled offset cannot be scaled.");

1163 OffsetImm /= TII->getMemScale(*RtMI);

1164 }

1165

1166

1167 MachineInstrBuilder MIB;

1170 MachineOperand RegOp0 = getLdStRegOp(*RtMI);

1171 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);

1172 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;

1173

1174 if (RegOp0.isUse()) {

1175 if (!MergeForward) {

1176

1177

1178

1179

1180

1181

1182

1183

1184

1185

1186 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)

1187 if (It->readsRegister(PairedRegOp.getReg(), TRI))

1189 } else {

1190

1191

1192

1193

1195 for (MachineInstr &MI :

1196 make_range(std::next(I->getIterator()), Paired->getIterator()))

1197 MI.clearRegisterKills(Reg, TRI);

1198 }

1199 }

1200

1203

1204

1207

1208 MIB.add(RegOp0)

1209 .add(RegOp1)

1210 .add(BaseRegOp)

1213 .setMIFlags(I->mergeFlagsWith(*Paired));

1214

1215 (void)MIB;

1216

1218 dbgs() << "Creating pair load/store. Replacing instructions:\n ");

1223 if (SExtIdx != -1) {

1224

1225

1226

1227

1228 MachineOperand &DstMO = MIB->getOperand(SExtIdx);

1229

1230

1232

1233 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);

1234

1235 DstMO.setReg(DstRegW);

1238

1239

1240

1241

1242 MachineInstrBuilder MIBKill =

1243 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)

1247

1248 MachineInstrBuilder MIBSXTW =

1249 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)

1253 (void)MIBSXTW;

1254

1255

1256

1257

1258

1259

1260

1261

1262

1263

1264

1265

1266

1267

1268

1269

1270

1271

1272

1273

1274

1275

1276

1277

1278

1279

1280

1281

1282

1283

1284

1285

1286

1287

1288

1289 if (I->peekDebugInstrNum()) {

1290

1291

1292

1293

1294

1295

1296

1297 unsigned NewInstrNum;

1298 if (DstRegX == I->getOperand(0).getReg()) {

1301 *MIBSXTW);

1302 } else {

1305 }

1306 }

1307 if (Paired->peekDebugInstrNum()) {

1308

1309

1310

1311

1312

1313

1314

1315 unsigned NewInstrNum;

1316 if (DstRegX == Paired->getOperand(0).getReg()) {

1319 *MIBSXTW);

1320 } else {

1323 *MIB);

1324 }

1325 }

1326

1329 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {

1330

1331

1332 MachineOperand &MOp0 = MIB->getOperand(0);

1333 MachineOperand &MOp1 = MIB->getOperand(1);

1335 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");

1336 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));

1337 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));

1339 } else {

1340

1341

1342

1343

1344

1345

1346

1347

1348

1349

1350

1351

1352

1353

1354

1355

1356

1357

1358

1359

1360

1361

1362

1363

1364

1365

1366

1367

1368 if (I->peekDebugInstrNum()) {

1371 *MIB);

1372 }

1373 if (Paired->peekDebugInstrNum()) {

1376 *MIB);

1377 }

1378

1380 }

1382

1383 if (MergeForward)

1387

1388

1389

1390

1393 SmallSetVector<Register, 4> Ops;

1394 for (const MachineOperand &MO :

1395 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))

1396 if (MO.isReg() && MO.isImplicit() && MO.isDef())

1397 Ops.insert(MO.getReg());

1398 for (const MachineOperand &MO :

1399 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))

1400 if (MO.isReg() && MO.isImplicit() && MO.isDef())

1401 Ops.insert(MO.getReg());

1402 for (auto Op : Ops)

1404 };

1405 CopyImplicitOps(I, Paired);

1406

1407

1408 I->eraseFromParent();

1409 Paired->eraseFromParent();

1410

1411 return NextI;

1412}

1413

1418 next_nodbg(LoadI, LoadI->getParent()->end());

1419

1420 int LoadSize = TII->getMemScale(*LoadI);

1421 int StoreSize = TII->getMemScale(*StoreI);

1423 const MachineOperand &StMO = getLdStRegOp(*StoreI);

1425 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);

1426

1427 assert((IsStoreXReg ||

1428 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&

1429 "Unexpected RegClass");

1430

1431 MachineInstr *BitExtMI;

1432 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {

1433

1434

1435 if (StRt == LdRt && LoadSize == 8) {

1436 for (MachineInstr &MI : make_range(StoreI->getIterator(),

1437 LoadI->getIterator())) {

1438 if (MI.killsRegister(StRt, TRI)) {

1439 MI.clearRegisterKills(StRt, TRI);

1440 break;

1441 }

1442 }

1443 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");

1446 LoadI->eraseFromParent();

1447 return NextI;

1448 }

1449

1450 BitExtMI =

1451 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1452 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)

1453 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)

1454 .add(StMO)

1457 } else {

1458

1459

1461 return NextI;

1462 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);

1463 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&

1464 "Unsupported ld/st match");

1465 assert(LoadSize <= StoreSize && "Invalid load size");

1466 int UnscaledLdOffset =

1467 IsUnscaled

1470 int UnscaledStOffset =

1471 IsUnscaled

1474 int Width = LoadSize * 8;

1476 IsStoreXReg ? Register(TRI->getMatchingSuperReg(

1477 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))

1478 : LdRt;

1479

1480 assert((UnscaledLdOffset >= UnscaledStOffset &&

1481 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&

1482 "Invalid offset");

1483

1484 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);

1485 int Imms = Immr + Width - 1;

1486 if (UnscaledLdOffset == UnscaledStOffset) {

1487 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)

1488 | ((Immr) << 6)

1489 | ((Imms) << 0)

1490 ;

1491

1492 BitExtMI =

1493 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1494 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),

1495 DestReg)

1496 .add(StMO)

1497 .addImm(AndMaskEncoded)

1499 } else if (IsStoreXReg && Imms == 31) {

1500

1501

1502 assert(Immr <= Imms && "Expected LSR alias of UBFM");

1503 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1504 TII->get(AArch64::UBFMWri),

1505 TRI->getSubReg(DestReg, AArch64::sub_32))

1506 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))

1510 } else {

1511 BitExtMI =

1512 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

1513 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),

1514 DestReg)

1515 .add(StMO)

1519 }

1520 }

1521

1522

1523 for (MachineInstr &MI : make_range(StoreI->getIterator(),

1525 if (MI.killsRegister(StRt, TRI)) {

1526 MI.clearRegisterKills(StRt, TRI);

1527 break;

1528 }

1529

1530 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");

1539

1540

1541 LoadI->eraseFromParent();

1542 return NextI;

1543}

1544

1546

1547

1548 if (IsUnscaled) {

1549

1550

1551 if (Offset % OffsetStride)

1552 return false;

1553 Offset /= OffsetStride;

1554 }

1556}

1557

1558

1559

1560

1561

1562static int alignTo(int Num, int PowOf2) {

1563 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);

1564}

1565

1570 if (MIa.mayAlias(AA, *MIb, false)) {

1571 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());

1572 return true;

1573 }

1574 }

1575

1577 return false;

1578}

1579

1580bool AArch64LoadStoreOpt::findMatchingStore(

1585 MachineInstr &LoadMI = *I;

1587

1588

1589

1591 return false;

1592

1593

1594

1595 ModifiedRegUnits.clear();

1596 UsedRegUnits.clear();

1597

1598 unsigned Count = 0;

1599 do {

1601 MachineInstr &MI = *MBBI;

1602

1603

1604

1605 if (MI.isTransient())

1607

1608

1609

1610

1611

1612

1613

1619 StoreI = MBBI;

1620 return true;

1621 }

1622

1623 if (MI.isCall())

1624 return false;

1625

1626

1628

1629

1630

1631 if (!ModifiedRegUnits.available(BaseReg))

1632 return false;

1633

1634

1635 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, false))

1636 return false;

1637 } while (MBBI != B && Count < Limit);

1638 return false;

1639}

1640

1645

1646

1647

1649 LdStPairFlags &Flags,

1651

1652 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))

1653 return false;

1654

1655

1657 TII->isLdStPairSuppressed(FirstMI) &&

1658 "FirstMI shouldn't get here if either of these checks are true.");

1659

1662 return false;

1663

1664 unsigned OpcA = FirstMI.getOpcode();

1665 unsigned OpcB = MI.getOpcode();

1666

1667

1668 if (OpcA == OpcB)

1670

1671

1672

1673 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||

1674 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)

1675 return false;

1676

1677

1679 return false;

1680

1681

1682 bool IsValidLdStrOpc, PairIsValidLdStrOpc;

1684 assert(IsValidLdStrOpc &&

1685 "Given Opc should be a Load or Store with an immediate");

1686

1688 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);

1689 return true;

1690 }

1691

1692

1693

1694 if (!PairIsValidLdStrOpc)

1695 return false;

1696

1697

1698

1699 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))

1702 TII->getMemScale(FirstMI) == TII->getMemScale(MI);

1703

1704

1705

1706

1708 return true;

1709

1710

1711 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&

1713

1714

1715}

1716

1719 if (MOP.isReg()) {

1720 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());

1721

1722

1723

1724

1725

1726

1727 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&

1728 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||

1729 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||

1730 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {

1733 << " Cannot rename operands with multiple disjunct subregisters ("

1734 << MOP << ")\n");

1735 return false;

1736 }

1737

1738

1739

1740

1743 return false;

1744 return TRI->isSuperOrSubRegisterEq(

1746 }

1747 }

1750}

1751

1752static bool

1757 return false;

1758

1759

1760

1761

1763

1767 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&

1768 MOP.isImplicit() && MOP.isKill() &&

1769 TRI->regsOverlap(RegToRename, MOP.getReg());

1770 })) {

1771 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);

1772 return false;

1773 }

1774

1775 bool FoundDef = false;

1776

1777

1778

1779

1780

1782 bool IsDef) {

1784

1786 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "

1787 << "currently\n");

1788 return false;

1789 }

1790

1792

1793

1794

1795 FoundDef = IsDef;

1796

1797

1798 if (FoundDef) {

1799

1800

1801

1802

1803

1804

1805

1806 if (MI.isPseudo()) {

1807 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");

1808 return false;

1809 }

1810

1811 for (auto &MOP : MI.operands()) {

1813 TRI->regsOverlap(MOP.getReg(), RegToRename))

1814 continue;

1816 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);

1817 return false;

1818 }

1819 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));

1820 }

1821 return true;

1822 } else {

1823 for (auto &MOP : MI.operands()) {

1825 TRI->regsOverlap(MOP.getReg(), RegToRename))

1826 continue;

1827

1829 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);

1830 return false;

1831 }

1832 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));

1833 }

1834 }

1835 return true;

1836 };

1837

1839 return false;

1840

1841 if (!FoundDef) {

1842 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");

1843 return false;

1844 }

1845 return true;

1846}

1847

1848

1849

1850

1851

1852

1853

1854

1855

1856

1863 return false;

1864

1865 UsedInBetween.accumulate(FirstLoad);

1867 bool Success = std::all_of(

1870 LLVM_DEBUG(dbgs() << "Checking " << MI);

1871

1872 if (MI.getFlag(MachineInstr::FrameSetup)) {

1873 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "

1874 << "currently\n");

1875 return false;

1876 }

1877

1878 for (auto &MOP : MI.operands()) {

1879 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||

1880 !TRI->regsOverlap(MOP.getReg(), RegToRename))

1881 continue;

1882 if (!canRenameMOP(MOP, TRI)) {

1883 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);

1884 return false;

1885 }

1886 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));

1887 }

1888

1889 return true;

1890 });

1892}

1893

1894

1895

1896

1897

1898

1899

1900

1907

1908

1909 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {

1910 return any_of(TRI->sub_and_superregs_inclusive(PR),

1912 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);

1913 });

1914 };

1915

1916

1917

1918 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {

1921 TRI->sub_and_superregs_inclusive(PR),

1922 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });

1923 });

1924 };

1925

1926 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);

1927 for (const MCPhysReg &PR : *RegClass) {

1929 RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&

1930 CanBeUsedForAllClasses(PR)) {

1931 DefinedInBB.addReg(PR);

1933 << "\n");

1934 return {PR};

1935 }

1936 }

1937 LLVM_DEBUG(dbgs() << "No rename register found from "

1938 << TRI->getRegClassName(RegClass) << "\n");

1939 return std::nullopt;

1940}

1941

1942

1943

1944

1950 std::optional RenameReg;

1952 return RenameReg;

1953

1957 return RenameReg;

1958

1959 const bool IsLoad = FirstMI.mayLoad();

1960

1961 if (!MaybeCanRename) {

1962 if (IsLoad)

1964 RequiredClasses, TRI)};

1965 else

1966 MaybeCanRename = {

1968 }

1969

1970 if (*MaybeCanRename) {

1972 RequiredClasses, TRI);

1973 }

1974 return RenameReg;

1975}

1976

1977

1978

1981 LdStPairFlags &Flags, unsigned Limit,

1982 bool FindNarrowMerge) {

1986 MachineInstr &FirstMI = *I;

1988

1990 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);

1994 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;

1996

1997 std::optional MaybeCanRename;

1999 MaybeCanRename = {false};

2000

2001 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;

2002 LiveRegUnits UsedInBetween;

2003 UsedInBetween.init(*TRI);

2004

2005 Flags.clearRenameReg();

2006

2007

2008

2009 ModifiedRegUnits.clear();

2010 UsedRegUnits.clear();

2011

2012

2013 SmallVector<MachineInstr *, 4> MemInsns;

2014

2018 MachineInstr &MI = *MBBI;

2020

2022

2023

2024

2025 if (MI.isTransient())

2027

2028 Flags.setSExtIdx(-1);

2031 assert(MI.mayLoadOrStore() && "Expected memory operation.");

2032

2033

2034

2035

2036

2037

2040 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);

2041 if (IsUnscaled != MIIsUnscaled) {

2042

2043

2044

2045 int MemSize = TII->getMemScale(MI);

2046 if (MIIsUnscaled) {

2047

2048

2049 if (MIOffset % MemSize) {

2051 UsedRegUnits, TRI);

2053 continue;

2054 }

2055 MIOffset /= MemSize;

2056 } else {

2057 MIOffset *= MemSize;

2058 }

2059 }

2060

2062

2063 if (BaseReg == MIBaseReg) {

2064

2065

2066

2067

2068 if (IsPreLdSt) {

2069 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);

2070 bool IsBaseRegUsed = !UsedRegUnits.available(

2072 bool IsBaseRegModified = !ModifiedRegUnits.available(

2074

2075

2076

2077 bool IsMIRegTheSame =

2080 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||

2081 IsMIRegTheSame) {

2083 UsedRegUnits, TRI);

2085 continue;

2086 }

2087 } else {

2088 if ((Offset != MIOffset + OffsetStride) &&

2089 (Offset + OffsetStride != MIOffset)) {

2091 UsedRegUnits, TRI);

2093 continue;

2094 }

2095 }

2096

2097 int MinOffset = Offset < MIOffset ? Offset : MIOffset;

2098 if (FindNarrowMerge) {

2099

2100

2101

2102

2103 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||

2106 UsedRegUnits, TRI);

2108 continue;

2109 }

2110 } else {

2111

2112

2113

2114

2115 if (inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {

2117 UsedRegUnits, TRI);

2119 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "

2120 << "keep looking.\n");

2121 continue;

2122 }

2123

2124

2125

2126 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {

2128 UsedRegUnits, TRI);

2131 << "Offset doesn't fit due to alignment requirements, "

2132 << "keep looking.\n");

2133 continue;

2134 }

2135 }

2136

2137

2138

2139

2140

2141

2142

2143 if (!ModifiedRegUnits.available(BaseReg))

2144 return E;

2145

2146 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(

2148

2149

2150

2151

2152

2153 bool RtNotModified =

2155 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&

2157

2158 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"

2159 << "Reg '" << getLdStRegOp(MI) << "' not modified: "

2160 << (RtNotModified ? "true" : "false") << "\n"

2162 << (RtNotUsed ? "true" : "false") << "\n");

2163

2164 if (RtNotModified && RtNotUsed && mayAlias(MI, MemInsns, AA)) {

2165

2166

2167

2168 if (SameLoadReg) {

2169 std::optional RenameReg =

2171 Reg, DefinedInBB, UsedInBetween,

2172 RequiredClasses, TRI);

2173 if (!RenameReg) {

2175 UsedRegUnits, TRI);

2177 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "

2178 << "keep looking.\n");

2179 continue;

2180 }

2181 Flags.setRenameReg(*RenameReg);

2182 }

2183

2184 Flags.setMergeForward(false);

2185 if (!SameLoadReg)

2186 Flags.clearRenameReg();

2187 return MBBI;

2188 }

2189

2190

2191

2192

2193

2194 RtNotModified = !(

2196

2197 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"

2199 << "' not modified: "

2200 << (RtNotModified ? "true" : "false") << "\n");

2201

2202 if (RtNotModified && mayAlias(FirstMI, MemInsns, AA)) {

2204 Flags.setMergeForward(true);

2205 Flags.clearRenameReg();

2206 return MBBI;

2207 }

2208

2210 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,

2211 RequiredClasses, TRI);

2212 if (RenameReg) {

2213 Flags.setMergeForward(true);

2214 Flags.setRenameReg(*RenameReg);

2215 MBBIWithRenameReg = MBBI;

2216 }

2217 }

2218 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "

2219 << "interference in between, keep looking.\n");

2220 }

2221 }

2222

2223 if (Flags.getRenameReg())

2224 return MBBIWithRenameReg;

2225

2226

2227

2228 if (MI.isCall()) {

2229 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");

2230 return E;

2231 }

2232

2233

2235

2236

2237

2238 if (!ModifiedRegUnits.available(BaseReg)) {

2239 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");

2240 return E;

2241 }

2242

2243

2244 if (MI.mayLoadOrStore())

2246 }

2247 return E;

2248}

2249

2252 assert((MI.getOpcode() == AArch64::SUBXri ||

2253 MI.getOpcode() == AArch64::ADDXri) &&

2254 "Expected a register update instruction");

2255 auto End = MI.getParent()->end();

2256 if (MaybeCFI == End ||

2257 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||

2260 MI.getOperand(0).getReg() != AArch64::SP)

2261 return End;

2262

2264 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();

2269 return MaybeCFI;

2270 default:

2271 return End;

2272 }

2273}

2274

2275std::optionalMachineBasicBlock::iterator AArch64LoadStoreOpt::mergeUpdateInsn(

2277 bool IsForward, bool IsPreIdx, bool MergeEither) {

2278 assert((Update->getOpcode() == AArch64::ADDXri ||

2279 Update->getOpcode() == AArch64::SUBXri) &&

2280 "Unexpected base register update instruction to merge!");

2283

2284

2285

2286

2287

2289 if (IsForward) {

2292 if (MergeEither) {

2293 InsertPt = Update;

2294 } else {

2295

2296 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {

2297 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;

2298 }))

2299 return std::nullopt;

2300

2301 MachineBasicBlock *MBB = InsertPt->getParent();

2303 }

2304 }

2305 }

2306

2307

2308

2309

2310 if (NextI == Update)

2312

2313 int Value = Update->getOperand(2).getImm();

2315 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");

2316 if (Update->getOpcode() == AArch64::SUBXri)

2318

2321 MachineInstrBuilder MIB;

2322 int Scale, MinOffset, MaxOffset;

2325

2326 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),

2328 .add(Update->getOperand(0))

2333 .setMIFlags(I->mergeFlagsWith(*Update));

2334 } else {

2335

2336 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),

2338 .add(Update->getOperand(0))

2344 .setMIFlags(I->mergeFlagsWith(*Update));

2345 }

2346

2347 if (IsPreIdx) {

2348 ++NumPreFolded;

2349 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");

2350 } else {

2351 ++NumPostFolded;

2352 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");

2353 }

2354 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");

2361

2362

2363 I->eraseFromParent();

2364 Update->eraseFromParent();

2365

2366 return NextI;

2367}

2368

2372 unsigned Offset, int Scale) {

2373 assert((Update->getOpcode() == AArch64::MOVKWi) &&

2374 "Unexpected const mov instruction to merge!");

2378 MachineInstr &MemMI = *I;

2379 unsigned Mask = (1 << 12) * Scale - 1;

2384 MachineInstrBuilder AddMIB, MemMIB;

2385

2386

2387 AddMIB =

2388 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))

2391 .addImm(High >> 12)

2392 .addImm(12);

2393 (void)AddMIB;

2394

2396 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))

2401 .setMIFlags(I->mergeFlagsWith(*Update));

2402 (void)MemMIB;

2403

2404 ++NumConstOffsetFolded;

2405 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");

2406 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");

2417

2418

2419 I->eraseFromParent();

2420 PrevI->eraseFromParent();

2421 Update->eraseFromParent();

2422

2423 return NextI;

2424}

2425

2426bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,

2427 MachineInstr &MI,

2428 unsigned BaseReg, int Offset) {

2429 switch (MI.getOpcode()) {

2430 default:

2431 break;

2432 case AArch64::SUBXri:

2433 case AArch64::ADDXri:

2434

2435

2436 if (MI.getOperand(2).isImm())

2437 break;

2438

2440 break;

2441

2442

2443

2444 if (MI.getOperand(0).getReg() != BaseReg ||

2445 MI.getOperand(1).getReg() != BaseReg)

2446 break;

2447

2448 int UpdateOffset = MI.getOperand(2).getImm();

2449 if (MI.getOpcode() == AArch64::SUBXri)

2450 UpdateOffset = -UpdateOffset;

2451

2452

2453

2454 int Scale, MinOffset, MaxOffset;

2456 if (UpdateOffset % Scale != 0)

2457 break;

2458

2459

2460 int ScaledOffset = UpdateOffset / Scale;

2461 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)

2462 break;

2463

2464

2465

2467 return true;

2468 break;

2469 }

2470 return false;

2471}

2472

2473bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,

2474 MachineInstr &MI,

2475 unsigned IndexReg,

2477

2478

2479 if (MI.getOpcode() == AArch64::MOVKWi &&

2480 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {

2481

2482

2485

2487 return false;

2489 MachineInstr &MovzMI = *MBBI;

2490

2491 if (MovzMI.getOpcode() == AArch64::MOVZWi &&

2494 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();

2496

2497 return Offset >> 24 == 0;

2498 }

2499 }

2500 return false;

2501}

2502

2506 MachineInstr &MemMI = *I;

2508

2511 TII->getMemScale(MemMI);

2512

2513

2514

2515

2516 if (MIUnscaledOffset != UnscaledOffset)

2517 return E;

2518

2519

2520

2521

2522

2523

2524

2527 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {

2529 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))

2530 return E;

2531 }

2532 }

2533

2534

2535

2536 ModifiedRegUnits.clear();

2537 UsedRegUnits.clear();

2539

2540

2541

2542

2543 const bool BaseRegSP = BaseReg == AArch64::SP;

2545

2546

2547

2548 return E;

2549 }

2550

2551 unsigned Count = 0;

2552 MachineBasicBlock *CurMBB = I->getParent();

2553

2555

2556 while (true) {

2559 MachineInstr &MI = *MBBI;

2560

2561

2562

2563 if (MI.isTransient())

2565

2566

2567 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))

2568 return MBBI;

2569

2570

2573

2574

2575

2576

2577

2578 if (!ModifiedRegUnits.available(BaseReg) ||

2579 !UsedRegUnits.available(BaseReg) ||

2580 (BaseRegSP && MBBI->mayLoadOrStore()))

2581 return E;

2582 }

2583

2584 if (!VisitSucc || Limit <= Count)

2585 break;

2586

2587

2588

2589 MachineBasicBlock *SuccToVisit = nullptr;

2590 unsigned LiveSuccCount = 0;

2591 for (MachineBasicBlock *Succ : CurMBB->successors()) {

2592 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {

2593 if (Succ->isLiveIn(*AI)) {

2594 if (LiveSuccCount++)

2595 return E;

2596 if (Succ->pred_size() == 1)

2597 SuccToVisit = Succ;

2598 break;

2599 }

2600 }

2601 }

2602 if (!SuccToVisit)

2603 break;

2604 CurMBB = SuccToVisit;

2606 }

2607

2608 return E;

2609}

2610

2615 MachineInstr &MemMI = *I;

2617 MachineFunction &MF = *MemMI.getMF();

2618

2621

2625 : AArch64::NoRegister};

2626

2627

2628

2630 return E;

2631

2632

2634 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)

2635 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))

2636 return E;

2637 }

2638

2639 const bool BaseRegSP = BaseReg == AArch64::SP;

2641

2642

2643

2644 return E;

2645 }

2646

2647 const AArch64Subtarget &Subtarget = MF.getSubtarget();

2648 unsigned RedZoneSize =

2650

2651

2652

2653 ModifiedRegUnits.clear();

2654 UsedRegUnits.clear();

2655 unsigned Count = 0;

2656 bool MemAccessBeforeSPPreInc = false;

2657 MergeEither = true;

2658 do {

2660 MachineInstr &MI = *MBBI;

2661

2662

2663

2664 if (MI.isTransient())

2666

2667

2668 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {

2669

2670

2671 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)

2672 return E;

2673 return MBBI;

2674 }

2675

2676

2678

2679

2680

2681 if (!ModifiedRegUnits.available(BaseReg) ||

2682 !UsedRegUnits.available(BaseReg))

2683 return E;

2684

2685

2686

2687

2688

2689 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||

2690 (DestReg[0] != AArch64::NoRegister &&

2691 !(ModifiedRegUnits.available(DestReg[0]) &&

2692 UsedRegUnits.available(DestReg[0]))) ||

2693 (DestReg[1] != AArch64::NoRegister &&

2694 !(ModifiedRegUnits.available(DestReg[1]) &&

2695 UsedRegUnits.available(DestReg[1]))))

2696 MergeEither = false;

2697

2698

2699

2700

2701 if (BaseRegSP && MBBI->mayLoadOrStore())

2702 MemAccessBeforeSPPreInc = true;

2703 } while (MBBI != B && Count < Limit);

2704 return E;

2705}

2706

2708AArch64LoadStoreOpt::findMatchingConstOffsetBackward(

2712 MachineInstr &MemMI = *I;

2714

2715

2716

2718 return E;

2719

2720

2721

2725 return E;

2726

2728

2729

2730

2731 ModifiedRegUnits.clear();

2732 UsedRegUnits.clear();

2733 unsigned Count = 0;

2734 do {

2736 MachineInstr &MI = *MBBI;

2737

2738

2739

2740 if (MI.isTransient())

2742

2743

2744 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {

2745 return MBBI;

2746 }

2747

2748

2750

2751

2752

2753 if (!ModifiedRegUnits.available(IndexReg) ||

2754 !UsedRegUnits.available(IndexReg))

2755 return E;

2756

2757 } while (MBBI != B && Count < Limit);

2758 return E;

2759}

2760

2761bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(

2763 MachineInstr &MI = *MBBI;

2764

2765 if (MI.hasOrderedMemoryRef())

2766 return false;

2767

2769 return false;

2770

2771

2772

2774 return false;

2775

2776

2778 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {

2779 ++NumLoadsFromStoresPromoted;

2780

2781

2782

2783 MBBI = promoteLoadFromStore(MBBI, StoreI);

2784 return true;

2785 }

2786 return false;

2787}

2788

2789

2790bool AArch64LoadStoreOpt::tryToMergeZeroStInst(

2793 MachineInstr &MI = *MBBI;

2795

2796 if (TII->isCandidateToMergeOrPair(MI))

2797 return false;

2798

2799

2800 LdStPairFlags Flags;

2802 findMatchingInsn(MBBI, Flags, LdStLimit, true);

2803 if (MergeMI != E) {

2804 ++NumZeroStoresPromoted;

2805

2806

2807

2808 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);

2809 return true;

2810 }

2811 return false;

2812}

2813

2814

2815

2817 MachineInstr &MI = *MBBI;

2819

2820 if (TII->isCandidateToMergeOrPair(MI))

2821 return false;

2822

2823

2824 if (MI.mayLoad() && Subtarget->hasDisableLdp())

2825 return false;

2826

2827

2828 if (MI.mayStore() && Subtarget->hasDisableStp())

2829 return false;

2830

2831

2832

2833

2834 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);

2836 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;

2837

2839 Offset -= OffsetStride;

2841 return false;

2842

2843

2844 LdStPairFlags Flags;

2846 findMatchingInsn(MBBI, Flags, LdStLimit, false);

2847 if (Paired != E) {

2848

2849

2850 auto Prev = std::prev(MBBI);

2851

2852

2853

2854 MachineMemOperand *MemOp =

2855 MI.memoperands_empty() ? nullptr : MI.memoperands().front();

2856

2857

2858

2859

2860 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||

2861 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {

2862

2863 if (!MemOp || !MemOp->getMemoryType().isValid()) {

2864 NumFailedAlignmentCheck++;

2865 return false;

2866 }

2867

2868

2869

2870 uint64_t MemAlignment = MemOp->getAlign().value();

2871 uint64_t TypeAlignment =

2872 Align(MemOp->getSize().getValue().getKnownMinValue()).value();

2873

2874 if (MemAlignment < 2 * TypeAlignment) {

2875 NumFailedAlignmentCheck++;

2876 return false;

2877 }

2878 }

2879

2880 ++NumPairCreated;

2881 if (TII->hasUnscaledLdStOffset(MI))

2882 ++NumUnscaledPairCreated;

2883

2884 MBBI = mergePairedInsns(MBBI, Paired, Flags);

2885

2886

2887 for (auto I = std::next(Prev); I != MBBI; I++)

2889

2890 return true;

2891 }

2892 return false;

2893}

2894

2895bool AArch64LoadStoreOpt::tryToMergeLdStUpdate

2897 MachineInstr &MI = *MBBI;

2900

2901

2902

2903

2904

2905

2906 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);

2907 if (Update != E) {

2908

2909 if (auto NextI = mergeUpdateInsn(MBBI, Update, false,

2910 false,

2911 false)) {

2912 MBBI = *NextI;

2913 return true;

2914 }

2915 }

2916

2917

2918 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))

2919 return false;

2920

2921

2922

2923

2924

2925

2926 bool MergeEither;

2927 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);

2928 if (Update != E) {

2929

2930 if (auto NextI = mergeUpdateInsn(MBBI, Update, true,

2931 true, MergeEither)) {

2932 MBBI = *NextI;

2933 return true;

2934 }

2935 }

2936

2937

2938

2939

2940 int UnscaledOffset =

2942

2943

2944

2945

2946

2947

2948 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);

2949 if (Update != E) {

2950

2951 if (auto NextI = mergeUpdateInsn(MBBI, Update, false,

2952 true,

2953 false)) {

2954 MBBI = *NextI;

2955 return true;

2956 }

2957 }

2958

2959 return false;

2960}

2961

2963 int Scale) {

2964 MachineInstr &MI = *MBBI;

2967

2968

2969 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))

2970 return false;

2971

2972

2973

2974

2975

2976

2977

2978

2981 if (Update != E && (Offset & (Scale - 1)) == 0) {

2982

2983 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);

2984 return true;

2985 }

2986

2987 return false;

2988}

2989

2990bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,

2991 bool EnableNarrowZeroStOpt) {

2992 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo();

2993

2995

2996

2997

2998

2999

3000

3001

3002

3003

3004

3009 else

3011 }

3012

3013

3014

3015

3016

3017

3018

3019

3020

3021

3022

3023 if (EnableNarrowZeroStOpt)

3028 else

3030 }

3031

3032

3033

3034

3035

3036

3037

3038

3039

3040

3041

3042

3043

3044

3045

3047 DefinedInBB.clear();

3049 }

3050

3053

3054

3056 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))

3058 else

3060 }

3061

3062

3063

3064

3065

3066

3067

3072 else

3074 }

3075

3076

3077

3078

3079

3080

3081

3082

3085 int Scale;

3088 else

3090 }

3091

3093}

3094

3095bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

3097 return false;

3098

3099 Subtarget = &Fn.getSubtarget();

3102 AA = &getAnalysis().getAAResults();

3103

3104

3105

3106

3107 ModifiedRegUnits.init(*TRI);

3108 UsedRegUnits.init(*TRI);

3110

3112 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();

3113 for (auto &MBB : Fn) {

3116 }

3117

3119}

3120

3121

3122

3123

3124

3125

3126

3127

3128

3129

3130

3131

3132

3134 return new AArch64LoadStoreOpt();

3135}

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)

static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)

Definition AArch64LoadStoreOptimizer.cpp:682

static bool isPromotableLoadFromStore(MachineInstr &MI)

Definition AArch64LoadStoreOptimizer.cpp:718

static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)

Definition AArch64LoadStoreOptimizer.cpp:664

static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)

Definition AArch64LoadStoreOptimizer.cpp:1545

static unsigned getMatchingPairOpcode(unsigned Opc)

Definition AArch64LoadStoreOptimizer.cpp:348

static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)

Definition AArch64LoadStoreOptimizer.cpp:1648

static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

Definition AArch64LoadStoreOptimizer.cpp:1901

static bool needsWinCFI(const MachineFunction *MF)

Definition AArch64LoadStoreOptimizer.cpp:1641

static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

Definition AArch64LoadStoreOptimizer.cpp:1857

static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

Definition AArch64LoadStoreOptimizer.cpp:1945

static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)

Definition AArch64LoadStoreOptimizer.cpp:1566

static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)

static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)

Definition AArch64LoadStoreOptimizer.cpp:1717

static unsigned getPreIndexedOpcode(unsigned Opc)

Definition AArch64LoadStoreOptimizer.cpp:442

#define AARCH64_LOAD_STORE_OPT_NAME

Definition AArch64LoadStoreOptimizer.cpp:89

static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)

This function will add a new entry into the debugValueSubstitutions table when two instruction have b...

Definition AArch64LoadStoreOptimizer.cpp:968

static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)

static bool isPromotableZeroStoreInst(MachineInstr &MI)

Definition AArch64LoadStoreOptimizer.cpp:711

static unsigned getMatchingWideOpcode(unsigned Opc)

Definition AArch64LoadStoreOptimizer.cpp:329

static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)

Definition AArch64LoadStoreOptimizer.cpp:274

static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)

Definition AArch64LoadStoreOptimizer.cpp:2251

static bool isTagStore(const MachineInstr &MI)

Definition AArch64LoadStoreOptimizer.cpp:262

static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)

Definition AArch64LoadStoreOptimizer.cpp:412

static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)

Definition AArch64LoadStoreOptimizer.cpp:931

static bool isRewritableImplicitDef(unsigned Opc)

Definition AArch64LoadStoreOptimizer.cpp:836

static unsigned getPostIndexedOpcode(unsigned Opc)

Definition AArch64LoadStoreOptimizer.cpp:551

static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)

Definition AArch64LoadStoreOptimizer.cpp:736

static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)

static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)

Definition AArch64LoadStoreOptimizer.cpp:693

static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)

Definition AArch64LoadStoreOptimizer.cpp:630

static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)

Definition AArch64LoadStoreOptimizer.cpp:802

static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)

Definition AArch64LoadStoreOptimizer.cpp:953

static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)

Definition AArch64LoadStoreOptimizer.cpp:1753

static unsigned getBaseAddressOpcode(unsigned Opc)

Definition AArch64LoadStoreOptimizer.cpp:515

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file provides an implementation of debug counters.

#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Register const TargetRegisterInfo * TRI

Promote Memory to Register

static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

MachineInstr unsigned OpIdx

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...

static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)

Returns the immediate offset operator of a load/store.

static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)

Returns the shift amount operator of a load/store.

static bool isPreLdSt(const MachineInstr &MI)

Returns whether the instruction is a pre-indexed load/store.

static bool isPairedLdSt(const MachineInstr &MI)

Returns whether the instruction is a paired load/store.

static int getMemScale(unsigned Opc)

Scaling factor for (scaled or unscaled) load or store.

static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)

Returns the base register operator of a load/store.

const AArch64RegisterInfo * getRegisterInfo() const override

const AArch64InstrInfo * getInstrInfo() const override

const AArch64TargetLowering * getTargetLowering() const override

bool isLittleEndian() const

unsigned getRedZoneSize(const Function &F) const

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

static bool shouldExecute(CounterInfo &Counter)

FunctionPass class - This class is used to implement most global optimizations.

bool needsUnwindTableEntry() const

True if this function needs an unwind table.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

A set of register units used to track register liveness.

static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)

For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...

bool available(MCRegister Reg) const

Returns true if no part of physical register Reg is live.

void init(const TargetRegisterInfo &TRI)

Initialize and clear the set.

void addReg(MCRegister Reg)

Adds register units covered by physical register Reg.

void removeReg(MCRegister Reg)

Removes all register units covered by physical register Reg.

LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)

Adds registers living into block MBB.

void clear()

Clears the set.

LLVM_ABI void accumulate(const MachineInstr &MI)

Adds all register units used, defined or clobbered in MI.

An instruction for reading from memory.

bool usesWindowsCFI() const

OpType getOperation() const

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

iterator_range< succ_iterator > successors()

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

Properties which a MachineFunction may have at a given point in time.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

const std::vector< MCCFIInstruction > & getFrameInstructions() const

Returns a reference to a list of cfi instructions in the function's prologue.

void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)

Create a substitution between one <instr,operand> value to a different, new value.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const

const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register use operand.

const MachineInstrBuilder & setMIFlags(unsigned Flags) const

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const

Returns true if this instruction's memory access aliases the memory access of Other.

unsigned peekDebugInstrNum() const

Examine the instruction number of this MachineInstr.

bool mayLoad(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly read memory.

LLVM_ABI bool hasOrderedMemoryRef() const

Return true if this instruction may have an ordered or volatile memory reference, or if the informati...

LLVM_ABI const MachineFunction * getMF() const

Return the function that contains the basic block that this instruction belongs to.

bool mayStore(QueryType Type=AnyInBundle) const

Return true if this instruction could possibly modify memory.

bool isPseudo(QueryType Type=IgnoreBundle) const

Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.

LLVM_ABI void dump() const

LLVM_ABI unsigned getDebugInstrNum()

Fetch the instruction number of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

void setImplicit(bool Val=true)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

LLVM_ABI void setReg(Register Reg)

Change the register this operand corresponds to.

void setIsKill(bool Val=true)

LLVM_ABI bool isRenamable() const

isRenamable - Returns true if this register may be renamed, i.e.

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

bool isEarlyClobber() const

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

bool tracksLiveness() const

tracksLiveness - Returns true when tracking register liveness accurately.

Wrapper class representing virtual and physical registers.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

An instruction for storing to memory.

StringRef - Represent a constant reference to a string, i.e.

const MCAsmInfo * getMCAsmInfo() const

Return target specific asm information.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

self_iterator getIterator()

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Abstract Attribute helper functions.

static unsigned getShiftValue(unsigned Imm)

getShiftValue - Extract the shift value.

static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)

getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ Implicit

Not emitted register (e.g. carry, or temporary result).

@ Define

Register definition.

initializer< Ty > init(const Ty &Val)

BaseReg

Stack frame base register. Bit 0 of FREInfo.Info.

This is an optimization pass for GlobalISel generic memory operations.

IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)

Increment It, then continue incrementing it while it points to a debug instruction.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

@ Low

Lower the current thread's priority such that it does not affect foreground tasks significantly.

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)

Returns an iterator range over all physical register and mask operands for MI and bundled instruction...

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionAddr VTableAddr Count

FunctionPass * createAArch64LoadStoreOptimizationPass()

createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.

Definition AArch64LoadStoreOptimizer.cpp:3133

auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)

Construct a range iterator which begins at It and moves forwards until End is reached,...

iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

AAResults AliasAnalysis

Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.

IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)

Decrement It, then continue decrementing it while it points to a debug instruction.

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.