LLVM: lib/Target/ARM/ARMLoadStoreOptimizer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

63#include

64#include

65#include

66#include

67#include

68#include

69

70using namespace llvm;

71

72#define DEBUG_TYPE "arm-ldst-opt"

73

74STATISTIC(NumLDMGened , "Number of ldm instructions generated");

75STATISTIC(NumSTMGened , "Number of stm instructions generated");

76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");

77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");

78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");

79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");

80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");

81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");

82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");

83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");

84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");

85

86

87

88

89

90

93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));

94

95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"

96

97namespace {

98

99

100

102 static char ID;

103

113 bool LiveRegsValid;

114 bool RegClassInfoValid;

115 bool isThumb1, isThumb2;

116

118

120

123 MachineFunctionProperties::Property::NoVRegs);

124 }

125

127

128 private:

129

130

131 struct MemOpQueueEntry {

133 int Offset;

134 unsigned Position;

135

136 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)

137 : MI(&MI), Offset(Offset), Position(Position) {}

138 };

140

141

142

143 struct MergeCandidate {

144

146

147

148 unsigned LatestMIIdx;

149

150

151 unsigned EarliestMIIdx;

152

153

154

155 unsigned InsertPos;

156

157

158 bool CanMergeToLSMulti;

159

160

161 bool CanMergeToLSDouble;

162 };

166

172 unsigned Base, unsigned WordOffset,

176 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,

178 ArrayRef<std::pair<unsigned, bool>> Regs,

182 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,

184 ArrayRef<std::pair<unsigned, bool>> Regs,

186 void FormCandidates(const MemOpQueue &MemOps);

187 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);

192 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;

196 };

197

198}

199

200char ARMLoadStoreOpt::ID = 0;

201

203 false)

204

206 for (const auto &MO : MI.operands()) {

207 if (!MO.isReg())

208 continue;

209 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())

210

211

212 return true;

213 }

214

215 return false;

216}

217

219 unsigned Opcode = MI.getOpcode();

220 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;

221 unsigned NumOperands = MI.getDesc().getNumOperands();

222 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();

223

224 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||

225 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||

226 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||

227 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)

228 return OffField;

229

230

231 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||

232 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)

233 return OffField * 4;

234

239

242

244}

245

247 return MI.getOperand(1);

248}

249

251 return MI.getOperand(0);

252}

253

255 switch (Opcode) {

257 case ARM::LDRi12:

258 ++NumLDMGened;

259 switch (Mode) {

265 }

266 case ARM::STRi12:

267 ++NumSTMGened;

268 switch (Mode) {

274 }

275 case ARM::tLDRi:

276 case ARM::tLDRspi:

277

278

279 ++NumLDMGened;

280 switch (Mode) {

283 }

284 case ARM::tSTRi:

285 case ARM::tSTRspi:

286

287 ++NumSTMGened;

288 switch (Mode) {

290 case ARM_AM::ia: return ARM::tSTMIA_UPD;

291 }

292 case ARM::t2LDRi8:

293 case ARM::t2LDRi12:

294 ++NumLDMGened;

295 switch (Mode) {

297 case ARM_AM::ia: return ARM::t2LDMIA;

298 case ARM_AM::db: return ARM::t2LDMDB;

299 }

300 case ARM::t2STRi8:

301 case ARM::t2STRi12:

302 ++NumSTMGened;

303 switch (Mode) {

305 case ARM_AM::ia: return ARM::t2STMIA;

306 case ARM_AM::db: return ARM::t2STMDB;

307 }

308 case ARM::VLDRS:

309 ++NumVLDMGened;

310 switch (Mode) {

312 case ARM_AM::ia: return ARM::VLDMSIA;

313 case ARM_AM::db: return 0;

314 }

315 case ARM::VSTRS:

316 ++NumVSTMGened;

317 switch (Mode) {

319 case ARM_AM::ia: return ARM::VSTMSIA;

320 case ARM_AM::db: return 0;

321 }

322 case ARM::VLDRD:

323 ++NumVLDMGened;

324 switch (Mode) {

326 case ARM_AM::ia: return ARM::VLDMDIA;

327 case ARM_AM::db: return 0;

328 }

329 case ARM::VSTRD:

330 ++NumVSTMGened;

331 switch (Mode) {

333 case ARM_AM::ia: return ARM::VSTMDIA;

334 case ARM_AM::db: return 0;

335 }

336 }

337}

338

340 switch (Opcode) {

342 case ARM::LDMIA_RET:

343 case ARM::LDMIA:

344 case ARM::LDMIA_UPD:

345 case ARM::STMIA:

346 case ARM::STMIA_UPD:

347 case ARM::tLDMIA:

348 case ARM::tLDMIA_UPD:

349 case ARM::tSTMIA_UPD:

350 case ARM::t2LDMIA_RET:

351 case ARM::t2LDMIA:

352 case ARM::t2LDMIA_UPD:

353 case ARM::t2STMIA:

354 case ARM::t2STMIA_UPD:

355 case ARM::VLDMSIA:

356 case ARM::VLDMSIA_UPD:

357 case ARM::VSTMSIA:

358 case ARM::VSTMSIA_UPD:

359 case ARM::VLDMDIA:

360 case ARM::VLDMDIA_UPD:

361 case ARM::VSTMDIA:

362 case ARM::VSTMDIA_UPD:

364

365 case ARM::LDMDA:

366 case ARM::LDMDA_UPD:

367 case ARM::STMDA:

368 case ARM::STMDA_UPD:

370

371 case ARM::LDMDB:

372 case ARM::LDMDB_UPD:

373 case ARM::STMDB:

374 case ARM::STMDB_UPD:

375 case ARM::t2LDMDB:

376 case ARM::t2LDMDB_UPD:

377 case ARM::t2STMDB:

378 case ARM::t2STMDB_UPD:

379 case ARM::VLDMSDB_UPD:

380 case ARM::VSTMSDB_UPD:

381 case ARM::VLDMDDB_UPD:

382 case ARM::VSTMDDB_UPD:

384

385 case ARM::LDMIB:

386 case ARM::LDMIB_UPD:

387 case ARM::STMIB:

388 case ARM::STMIB_UPD:

390 }

391}

392

394 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;

395}

396

398 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;

399}

400

403}

404

406 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;

407}

408

410 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;

411}

412

415}

416

418 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;

419}

420

422 switch (Opc) {

424 case ARM::tLDRi:

425 case ARM::tSTRi:

426 case ARM::tLDRspi:

427 case ARM::tSTRspi:

428 return 1;

429 case ARM::tLDRHi:

430 case ARM::tSTRHi:

431 return 2;

432 case ARM::tLDRBi:

433 case ARM::tSTRBi:

434 return 4;

435 }

436}

437

439 switch (MI->getOpcode()) {

440 default: return 0;

441 case ARM::LDRi12:

442 case ARM::STRi12:

443 case ARM::tLDRi:

444 case ARM::tSTRi:

445 case ARM::tLDRspi:

446 case ARM::tSTRspi:

447 case ARM::t2LDRi8:

448 case ARM::t2LDRi12:

449 case ARM::t2STRi8:

450 case ARM::t2STRi12:

451 case ARM::VLDRS:

452 case ARM::VSTRS:

453 return 4;

454 case ARM::VLDRD:

455 case ARM::VSTRD:

456 return 8;

457 case ARM::LDMIA:

458 case ARM::LDMDA:

459 case ARM::LDMDB:

460 case ARM::LDMIB:

461 case ARM::STMIA:

462 case ARM::STMDA:

463 case ARM::STMDB:

464 case ARM::STMIB:

465 case ARM::tLDMIA:

466 case ARM::tLDMIA_UPD:

467 case ARM::tSTMIA_UPD:

468 case ARM::t2LDMIA:

469 case ARM::t2LDMDB:

470 case ARM::t2STMIA:

471 case ARM::t2STMDB:

472 case ARM::VLDMSIA:

473 case ARM::VSTMSIA:

474 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;

475 case ARM::VLDMDIA:

476 case ARM::VSTMDIA:

477 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;

478 }

479}

480

481

482

486 unsigned WordOffset,

488 unsigned PredReg) {

489 assert(isThumb1 && "Can only update base register uses for Thumb1!");

490

491

493 bool InsertSub = false;

494 unsigned Opc = MBBI->getOpcode();

495

496 if (MBBI->readsRegister(Base, nullptr)) {

498 bool IsLoad =

499 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;

500 bool IsStore =

501 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;

502

503 if (IsLoad || IsStore) {

504

505

506

507

509 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);

510

512

513

515

516 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))

518 else

519 InsertSub = true;

520 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&

521 !definesCPSR(*MBBI)) {

522

523

524

526 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);

527 Offset = (Opc == ARM::tSUBi8) ?

528 MO.getImm() + WordOffset * 4 :

529 MO.getImm() - WordOffset * 4 ;

530 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {

531

532

534

535 return;

536 } else {

537 InsertSub = true;

538 }

539 } else {

540

541 InsertSub = true;

542 }

543 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {

544

545

546

547 InsertSub = true;

548 }

549

550 if (InsertSub) {

551

555 .addImm(WordOffset * 4)

558 return;

559 }

560

561 if (MBBI->killsRegister(Base, nullptr) ||

562 MBBI->definesRegister(Base, nullptr))

563

564 return;

565 }

566

567

569

570

571

572

577 .addImm(WordOffset * 4)

580 }

581}

582

583

584unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {

585 if (!RegClassInfoValid) {

586 RegClassInfo.runOnMachineFunction(*MF);

587 RegClassInfoValid = true;

588 }

589

590 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))

591 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))

592 return Reg;

593 return 0;

594}

595

596

597

598

601

602 if (!LiveRegsValid) {

603 LiveRegs.init(*TRI);

604 LiveRegs.addLiveOuts(MBB);

605 LiveRegPos = MBB.end();

606 LiveRegsValid = true;

607 }

608

609 while (LiveRegPos != Before) {

610 --LiveRegPos;

611 LiveRegs.stepBackward(*LiveRegPos);

612 }

613}

614

616 unsigned Reg) {

617 for (const std::pair<unsigned, bool> &R : Regs)

618 if (R.first == Reg)

619 return true;

620 return false;

621}

622

623

624

625

626MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(

628 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,

630 ArrayRef<std::pair<unsigned, bool>> Regs,

632 unsigned NumRegs = Regs.size();

634

635

636

637 bool SafeToClobberCPSR = !isThumb1 ||

640

641 bool Writeback = isThumb1;

642

643

644

645

647 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");

648 if (Opcode == ARM::tLDRi)

649 Writeback = false;

650 else if (Opcode == ARM::tSTRi)

651 return nullptr;

652 }

653

655

657 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;

658

659 if (Offset == 4 && haveIBAndDA) {

661 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {

663 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {

664

666 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {

667

668

670

671

672

673

674 if (NumRegs <= 2)

675 return nullptr;

676

677

678

679 if (!SafeToClobberCPSR)

680 return nullptr;

681

682 unsigned NewBase;

684

685

686 NewBase = Regs[NumRegs-1].first;

687 Writeback = false;

688 } else {

689

690 moveLiveRegsBefore(MBB, InsertBefore);

691

692

694 for (const std::pair<unsigned, bool> &R : Regs)

695 LiveRegs.addReg(R.first);

696

697 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);

698 if (NewBase == 0)

699 return nullptr;

700 }

701

702 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm

703 : ARM::t2ADDri)

704 : (isThumb1 && Base == ARM::SP)

705 ? ARM::tADDrSPi

706 : (isThumb1 && Offset < 8)

707 ? ARM::tADDi3

708 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;

709

711

712

714 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm

715 : ARM::t2SUBri)

716 : (isThumb1 && Offset < 8 && Base != ARM::SP)

717 ? ARM::tSUBi3

718 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;

719 }

720

721 if (!TL->isLegalAddImmediate(Offset))

722

723 return nullptr;

724

725

726

727 bool KillOldBase = BaseKill &&

729

730 if (isThumb1) {

731

732

733

734

735

736 if (Base != NewBase &&

737 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {

738

740 !STI->hasV6Ops()) {

741

743 return nullptr;

744 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)

746 } else

747 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)

750

751

752 Base = NewBase;

753 KillOldBase = true;

754 }

755 if (BaseOpc == ARM::tADDrSPi) {

756 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");

757 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)

761 } else

762 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)

767 } else {

768 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)

773 }

774 Base = NewBase;

775 BaseKill = true;

776 }

777

779

780

781

783 if (!Opcode)

784 return nullptr;

785

786

787

788

789

790

791

792

793

794 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)

795 return nullptr;

796

798

799 if (Writeback) {

800 assert(isThumb1 && "expected Writeback only inThumb1");

801 if (Opcode == ARM::tLDMIA) {

803

804 Opcode = ARM::tLDMIA_UPD;

805 }

806

808

809

812

813

814

815 if (!BaseKill)

816 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);

817 } else {

818

821 }

822

824

825 for (const std::pair<unsigned, bool> &R : Regs)

827

829

831}

832

833MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(

835 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,

837 ArrayRef<std::pair<unsigned, bool>> Regs,

840 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");

841 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;

842

843 assert(Regs.size() == 2);

845 TII->get(LoadStoreOpcode));

846 if (IsLoad) {

849 } else {

852 }

856}

857

858

859MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {

861 unsigned Opcode = First->getOpcode();

867

871 bool IsKill = MO.isKill();

872 if (IsKill)

873 KilledRegs.insert(Reg);

874 Regs.push_back(std::make_pair(Reg, IsKill));

875 UsedRegs.insert(Reg);

876

877 if (IsLoad) {

878

879

880

883 continue;

886

888 continue;

889

890 if (MI->readsRegister(DefReg, nullptr))

891 continue;

893 }

894 }

895 }

896

897

899

900 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];

901 iterator InsertBefore = std::next(iterator(LatestMI));

910 if (Cand.CanMergeToLSDouble)

911 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,

912 Opcode, Pred, PredReg, DL, Regs,

913 Cand.Instrs);

914 if (!Merged && Cand.CanMergeToLSMulti)

915 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,

916 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);

917 if (!Merged)

918 return nullptr;

919

920

921

922 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);

923 bool EarliestAtBegin = false;

924 if (EarliestI == MBB.begin()) {

925 EarliestAtBegin = true;

926 } else {

927 EarliestI = std::prev(EarliestI);

928 }

929

930

933

934

935 if (EarliestAtBegin)

937 else

938 EarliestI = std::next(EarliestI);

939 auto FixupRange = make_range(EarliestI, iterator(Merged));

940

942

943

945 for (unsigned &ImpDefReg : ImpDefs) {

947 if (!MO.isReg() || MO.getReg() != ImpDefReg)

948 continue;

951 else if (MO.isDef())

952 ImpDefReg = 0;

953 }

954 }

955 }

956

958 for (unsigned ImpDef : ImpDefs)

960 } else {

961

962 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);

966 continue;

969 }

970 }

971 assert(ImpDefs.empty());

972 }

973

974 return Merged;

975}

976

979

980

981 return (Value % 4) == 0 && Value < 1024;

982}

983

984

985

988

989

990 unsigned Opcode = MI.getOpcode();

992 return true;

993

994

995

998 return true;

999 return false;

1000}

1001

1002

1003void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {

1005 unsigned Opcode = FirstMI->getOpcode();

1008

1009 unsigned SIndex = 0;

1010 unsigned EIndex = MemOps.size();

1011 do {

1012

1014 int Offset = MemOps[SIndex].Offset;

1017 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits::max()

1018 : TRI->getEncodingValue(PReg);

1019 unsigned Latest = SIndex;

1020 unsigned Earliest = SIndex;

1021 unsigned Count = 1;

1022 bool CanMergeToLSDouble =

1024

1025

1026 if (STI->isCortexM3() && isi32Load(Opcode) &&

1028 CanMergeToLSDouble = false;

1029

1030 bool CanMergeToLSMulti = true;

1031

1032

1033 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)

1034 CanMergeToLSMulti = false;

1035

1036

1037

1038 if (PReg == ARM::SP || PReg == ARM::PC)

1039 CanMergeToLSMulti = CanMergeToLSDouble = false;

1040

1041

1043 CanMergeToLSMulti = CanMergeToLSDouble = false;

1044

1045

1046 unsigned Limit;

1047 switch (Opcode) {

1048 default:

1049 Limit = UINT_MAX;

1050 break;

1051 case ARM::VLDRD:

1052 case ARM::VSTRD:

1053 Limit = 16;

1054 break;

1055 }

1056

1057

1058 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {

1059 int NewOffset = MemOps[I].Offset;

1060 if (NewOffset != Offset + (int)Size)

1061 break;

1064 if (Reg == ARM::SP || Reg == ARM::PC)

1065 break;

1066 if (Count == Limit)

1067 break;

1068

1069

1070 unsigned RegNum = MO.isUndef() ? std::numeric_limits::max()

1071 : TRI->getEncodingValue(Reg);

1072 bool PartOfLSMulti = CanMergeToLSMulti;

1073 if (PartOfLSMulti) {

1074

1075 if (RegNum <= PRegNum)

1076 PartOfLSMulti = false;

1077

1078

1079

1080 else if (!isNotVFP && RegNum != PRegNum+1)

1081 PartOfLSMulti = false;

1082 }

1083

1084 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;

1085

1086 if (!PartOfLSMulti && !PartOfLSDouble)

1087 break;

1088 CanMergeToLSMulti &= PartOfLSMulti;

1089 CanMergeToLSDouble &= PartOfLSDouble;

1090

1091

1092 unsigned Position = MemOps[I].Position;

1093 if (Position < MemOps[Latest].Position)

1094 Latest = I;

1095 else if (Position > MemOps[Earliest].Position)

1096 Earliest = I;

1097

1099 PRegNum = RegNum;

1100 }

1101

1102

1103 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;

1104 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)

1105 Candidate->Instrs.push_back(MemOps[C].MI);

1106 Candidate->LatestMIIdx = Latest - SIndex;

1107 Candidate->EarliestMIIdx = Earliest - SIndex;

1108 Candidate->InsertPos = MemOps[Latest].Position;

1109 if (Count == 1)

1110 CanMergeToLSMulti = CanMergeToLSDouble = false;

1111 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;

1112 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;

1113 Candidates.push_back(Candidate);

1114

1115 SIndex += Count;

1116 } while (SIndex < EIndex);

1117}

1118

1121 switch (Opc) {

1123 case ARM::LDMIA:

1124 case ARM::LDMDA:

1125 case ARM::LDMDB:

1126 case ARM::LDMIB:

1127 switch (Mode) {

1129 case ARM_AM::ia: return ARM::LDMIA_UPD;

1130 case ARM_AM::ib: return ARM::LDMIB_UPD;

1131 case ARM_AM::da: return ARM::LDMDA_UPD;

1132 case ARM_AM::db: return ARM::LDMDB_UPD;

1133 }

1134 case ARM::STMIA:

1135 case ARM::STMDA:

1136 case ARM::STMDB:

1137 case ARM::STMIB:

1138 switch (Mode) {

1140 case ARM_AM::ia: return ARM::STMIA_UPD;

1141 case ARM_AM::ib: return ARM::STMIB_UPD;

1142 case ARM_AM::da: return ARM::STMDA_UPD;

1143 case ARM_AM::db: return ARM::STMDB_UPD;

1144 }

1145 case ARM::t2LDMIA:

1146 case ARM::t2LDMDB:

1147 switch (Mode) {

1149 case ARM_AM::ia: return ARM::t2LDMIA_UPD;

1150 case ARM_AM::db: return ARM::t2LDMDB_UPD;

1151 }

1152 case ARM::t2STMIA:

1153 case ARM::t2STMDB:

1154 switch (Mode) {

1156 case ARM_AM::ia: return ARM::t2STMIA_UPD;

1157 case ARM_AM::db: return ARM::t2STMDB_UPD;

1158 }

1159 case ARM::VLDMSIA:

1160 switch (Mode) {

1162 case ARM_AM::ia: return ARM::VLDMSIA_UPD;

1163 case ARM_AM::db: return ARM::VLDMSDB_UPD;

1164 }

1165 case ARM::VLDMDIA:

1166 switch (Mode) {

1168 case ARM_AM::ia: return ARM::VLDMDIA_UPD;

1169 case ARM_AM::db: return ARM::VLDMDDB_UPD;

1170 }

1171 case ARM::VSTMSIA:

1172 switch (Mode) {

1174 case ARM_AM::ia: return ARM::VSTMSIA_UPD;

1175 case ARM_AM::db: return ARM::VSTMSDB_UPD;

1176 }

1177 case ARM::VSTMDIA:

1178 switch (Mode) {

1180 case ARM_AM::ia: return ARM::VSTMDIA_UPD;

1181 case ARM_AM::db: return ARM::VSTMDDB_UPD;

1182 }

1183 }

1184}

1185

1186

1187

1188

1191 bool CheckCPSRDef;

1192 int Scale;

1193 switch (MI.getOpcode()) {

1194 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;

1195 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;

1196 case ARM::t2SUBri:

1197 case ARM::t2SUBspImm:

1198 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;

1199 case ARM::t2ADDri:

1200 case ARM::t2ADDspImm:

1201 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;

1202 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;

1203 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;

1204 default: return 0;

1205 }

1206

1208 if (MI.getOperand(0).getReg() != Reg ||

1209 MI.getOperand(1).getReg() != Reg ||

1211 MIPredReg != PredReg)

1212 return 0;

1213

1214 if (CheckCPSRDef && definesCPSR(MI))

1215 return 0;

1216 return MI.getOperand(2).getImm() * Scale;

1217}

1218

1219

1227 if (MBBI == BeginMBBI)

1228 return EndMBBI;

1229

1230

1232 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)

1233 --PrevMBBI;

1234

1236 return Offset == 0 ? EndMBBI : PrevMBBI;

1237}

1238

1239

1248 while (NextMBBI != EndMBBI) {

1249

1250 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())

1251 ++NextMBBI;

1252 if (NextMBBI == EndMBBI)

1253 return EndMBBI;

1254

1256 if (Off) {

1258 return NextMBBI;

1259 }

1260

1261

1262

1263

1264

1265

1266 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||

1267 NextMBBI->definesRegister(Reg, TRI))

1268 return EndMBBI;

1269

1270 ++NextMBBI;

1271 }

1272 return EndMBBI;

1273}

1274

1275

1276

1277

1278

1279

1280

1281

1282

1283

1284

1285

1286

1287bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {

1288

1289 if (isThumb1) return false;

1290 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);

1291

1294 bool BaseKill = BaseOP.isKill();

1297 unsigned Opcode = MI->getOpcode();

1299

1300

1301

1304 return false;

1305

1317 } else {

1321

1322

1323

1324

1325

1326 if (!STI->hasMinSize() || !BaseKill)

1327 return false;

1328

1329 bool HighRegsUsed = false;

1331 if (MO.getReg() >= ARM::R8) {

1332 HighRegsUsed = true;

1333 break;

1334 }

1335

1336 if (!HighRegsUsed)

1337 MergeInstr = MBB.end();

1338 else

1339 return false;

1340 }

1341 }

1342 if (MergeInstr != MBB.end()) {

1343 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);

1345 }

1346

1352

1353

1355 MIB.add(MO);

1356

1357

1359

1360 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);

1362 return true;

1363}

1364

1367 switch (Opc) {

1368 case ARM::LDRi12:

1369 return ARM::LDR_PRE_IMM;

1370 case ARM::STRi12:

1371 return ARM::STR_PRE_IMM;

1372 case ARM::VLDRS:

1373 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;

1374 case ARM::VLDRD:

1375 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;

1376 case ARM::VSTRS:

1377 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;

1378 case ARM::VSTRD:

1379 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;

1380 case ARM::t2LDRi8:

1381 case ARM::t2LDRi12:

1382 return ARM::t2LDR_PRE;

1383 case ARM::t2STRi8:

1384 case ARM::t2STRi12:

1385 return ARM::t2STR_PRE;

1387 }

1388}

1389

1392 switch (Opc) {

1393 case ARM::LDRi12:

1394 return ARM::LDR_POST_IMM;

1395 case ARM::STRi12:

1396 return ARM::STR_POST_IMM;

1397 case ARM::VLDRS:

1398 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;

1399 case ARM::VLDRD:

1400 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;

1401 case ARM::VSTRS:

1402 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;

1403 case ARM::VSTRD:

1404 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;

1405 case ARM::t2LDRi8:

1406 case ARM::t2LDRi12:

1407 return ARM::t2LDR_POST;

1408 case ARM::t2LDRBi8:

1409 case ARM::t2LDRBi12:

1410 return ARM::t2LDRB_POST;

1411 case ARM::t2LDRSBi8:

1412 case ARM::t2LDRSBi12:

1413 return ARM::t2LDRSB_POST;

1414 case ARM::t2LDRHi8:

1415 case ARM::t2LDRHi12:

1416 return ARM::t2LDRH_POST;

1417 case ARM::t2LDRSHi8:

1418 case ARM::t2LDRSHi12:

1419 return ARM::t2LDRSH_POST;

1420 case ARM::t2STRi8:

1421 case ARM::t2STRi12:

1422 return ARM::t2STR_POST;

1423 case ARM::t2STRBi8:

1424 case ARM::t2STRBi12:

1425 return ARM::t2STRB_POST;

1426 case ARM::t2STRHi8:

1427 case ARM::t2STRHi12:

1428 return ARM::t2STRH_POST;

1429

1430 case ARM::MVE_VLDRBS16:

1431 return ARM::MVE_VLDRBS16_post;

1432 case ARM::MVE_VLDRBS32:

1433 return ARM::MVE_VLDRBS32_post;

1434 case ARM::MVE_VLDRBU16:

1435 return ARM::MVE_VLDRBU16_post;

1436 case ARM::MVE_VLDRBU32:

1437 return ARM::MVE_VLDRBU32_post;

1438 case ARM::MVE_VLDRHS32:

1439 return ARM::MVE_VLDRHS32_post;

1440 case ARM::MVE_VLDRHU32:

1441 return ARM::MVE_VLDRHU32_post;

1442 case ARM::MVE_VLDRBU8:

1443 return ARM::MVE_VLDRBU8_post;

1444 case ARM::MVE_VLDRHU16:

1445 return ARM::MVE_VLDRHU16_post;

1446 case ARM::MVE_VLDRWU32:

1447 return ARM::MVE_VLDRWU32_post;

1448 case ARM::MVE_VSTRB16:

1449 return ARM::MVE_VSTRB16_post;

1450 case ARM::MVE_VSTRB32:

1451 return ARM::MVE_VSTRB32_post;

1452 case ARM::MVE_VSTRH32:

1453 return ARM::MVE_VSTRH32_post;

1454 case ARM::MVE_VSTRBU8:

1455 return ARM::MVE_VSTRBU8_post;

1456 case ARM::MVE_VSTRHU16:

1457 return ARM::MVE_VSTRHU16_post;

1458 case ARM::MVE_VSTRWU32:

1459 return ARM::MVE_VSTRWU32_post;

1460

1462 }

1463}

1464

1465

1466

1467bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {

1468

1469

1470 if (isThumb1) return false;

1471 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);

1472

1475 unsigned Opcode = MI->getOpcode();

1477 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||

1478 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);

1479 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);

1481 if (MI->getOperand(2).getImm() != 0)

1482 return false;

1484 return false;

1485

1486

1487

1488 if (MI->getOperand(0).getReg() == Base)

1489 return false;

1490

1499 unsigned NewOpc;

1500 if (!isAM5 && Offset == Bytes) {

1502 } else if (Offset == -Bytes) {

1504 } else {

1506 if (MergeInstr == MBB.end())

1507 return false;

1508

1510 if ((isAM5 && Offset != Bytes) ||

1514 return false;

1515 }

1516 }

1517 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);

1519

1521

1523 if (isAM5) {

1524

1525

1526

1527

1537 (void)MIB;

1538 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);

1539 } else if (isLd) {

1540 if (isAM2) {

1541

1542 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {

1543 auto MIB =

1551 (void)MIB;

1552 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);

1553 } else {

1555 auto MIB =

1563 (void)MIB;

1564 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);

1565 }

1566 } else {

1567

1568 auto MIB =

1575 (void)MIB;

1576 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);

1577 }

1578 } else {

1580

1581

1582

1583 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {

1585

1593 (void)MIB;

1594 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);

1595 } else {

1596

1603 (void)MIB;

1604 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);

1605 }

1606 }

1608

1609 return true;

1610}

1611

1612bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {

1613 unsigned Opcode = MI.getOpcode();

1614 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&

1615 "Must have t2STRDi8 or t2LDRDi8");

1616 if (MI.getOperand(3).getImm() != 0)

1617 return false;

1618 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);

1619

1620

1621

1627 return false;

1628

1636 unsigned NewOpc;

1638 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;

1639 } else {

1641 if (MergeInstr == MBB.end())

1642 return false;

1643 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;

1645 return false;

1646 }

1647 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);

1649

1652 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {

1654 } else {

1655 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);

1657 }

1660 assert(TII->get(Opcode).getNumOperands() == 6 &&

1661 TII->get(NewOpc).getNumOperands() == 7 &&

1662 "Unexpected number of operands in Opcode specification.");

1663

1664

1666 MIB.add(MO);

1668

1669 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);

1671 return true;

1672}

1673

1674

1675

1677 unsigned Opcode = MI.getOpcode();

1678 switch (Opcode) {

1679 case ARM::VLDRS:

1680 case ARM::VSTRS:

1681 case ARM::VLDRD:

1682 case ARM::VSTRD:

1683 case ARM::LDRi12:

1684 case ARM::STRi12:

1685 case ARM::tLDRi:

1686 case ARM::tSTRi:

1687 case ARM::tLDRspi:

1688 case ARM::tSTRspi:

1689 case ARM::t2LDRi8:

1690 case ARM::t2LDRi12:

1691 case ARM::t2STRi8:

1692 case ARM::t2STRi12:

1693 break;

1694 default:

1695 return false;

1696 }

1697 if (MI.getOperand(1).isReg())

1698 return false;

1699

1700

1701

1702 if (MI.hasOneMemOperand())

1703 return false;

1704

1706

1707

1708

1709

1711 return false;

1712

1713

1714

1716 return false;

1717

1718

1719

1720

1721 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())

1722 return false;

1723

1724

1725 if (MI.getOperand(1).isUndef())

1726 return false;

1727

1728 return true;

1729}

1730

1733 bool isDef, unsigned NewOpc, unsigned Reg,

1734 bool RegDeadKill, bool RegUndef, unsigned BaseReg,

1738 if (isDef) {

1740 TII->get(NewOpc))

1744

1745

1747 } else {

1749 TII->get(NewOpc))

1753

1754

1756 }

1757}

1758

1762 unsigned Opcode = MI->getOpcode();

1763

1764

1765 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)

1766 return false;

1767

1770 Register EvenReg = MI->getOperand(0).getReg();

1771 Register OddReg = MI->getOperand(1).getReg();

1772 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);

1773 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);

1774

1775

1776

1777 bool Errata602117 = EvenReg == BaseReg &&

1778 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();

1779

1780 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&

1781 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);

1782

1783 if (!Errata602117 && !NonConsecutiveRegs)

1784 return false;

1785

1786 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;

1787 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;

1788 bool EvenDeadKill = isLd ?

1789 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();

1790 bool EvenUndef = MI->getOperand(0).isUndef();

1791 bool OddDeadKill = isLd ?

1792 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();

1793 bool OddUndef = MI->getOperand(1).isUndef();

1794 bool BaseKill = BaseOp.isKill();

1795 bool BaseUndef = BaseOp.isUndef();

1796 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&

1797 "register offset not handled below");

1801

1802 if (OddRegNum > EvenRegNum && OffImm == 0) {

1803

1804

1805 unsigned NewOpc = (isLd)

1806 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)

1807 : (isT2 ? ARM::t2STMIA : ARM::STMIA);

1808 if (isLd) {

1815 ++NumLDRD2LDM;

1816 } else {

1825 ++NumSTRD2STM;

1826 }

1827 } else {

1828

1829 unsigned NewOpc = (isLd)

1830 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)

1831 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);

1832

1833

1834 unsigned NewOpc2 = (isLd)

1835 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)

1836 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);

1837

1838

1839 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {

1840 assert(TRI->regsOverlap(OddReg, BaseReg));

1842 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);

1844 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,

1845 MI);

1846 } else {

1847 if (OddReg == EvenReg && EvenDeadKill) {

1848

1849

1850

1851 EvenDeadKill = false;

1852 OddDeadKill = true;

1853 }

1854

1855 if (EvenReg == BaseReg)

1856 EvenDeadKill = false;

1858 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,

1859 MI);

1861 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,

1862 MI);

1863 }

1864 if (isLd)

1865 ++NumLDRD2LDR;

1866 else

1867 ++NumSTRD2STR;

1868 }

1869

1871 return true;

1872}

1873

1874

1875

1877 MemOpQueue MemOps;

1878 unsigned CurrBase = 0;

1879 unsigned CurrOpc = ~0u;

1881 unsigned Position = 0;

1882 assert(Candidates.size() == 0);

1883 assert(MergeBaseCandidates.size() == 0);

1884 LiveRegsValid = false;

1885

1888

1889 MBBI = std::prev(I);

1890 if (FixInvalidRegPairOp(MBB, MBBI))

1891 continue;

1892 ++Position;

1893

1895 unsigned Opcode = MBBI->getOpcode();

1902 if (CurrBase == 0) {

1903

1904 CurrBase = Base;

1905 CurrOpc = Opcode;

1906 CurrPred = Pred;

1907 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));

1908 continue;

1909 }

1910

1911 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {

1912

1913

1914

1915

1916

1917

1918

1919 bool Overlap = false;

1921 Overlap = (Base == Reg);

1922 if (!Overlap) {

1923 for (const MemOpQueueEntry &E : MemOps) {

1924 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {

1925 Overlap = true;

1926 break;

1927 }

1928 }

1929 }

1930 }

1931

1932 if (!Overlap) {

1933

1934 if (Offset > MemOps.back().Offset) {

1935 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));

1936 continue;

1937 } else {

1938 MemOpQueue::iterator MI, ME;

1939 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {

1940 if (Offset < MI->Offset) {

1941

1942 break;

1943 }

1945

1946 MI = ME;

1947 break;

1948 }

1949 }

1950 if (MI != MemOps.end()) {

1951 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));

1952 continue;

1953 }

1954 }

1955 }

1956 }

1957

1958

1960 --Position;

1961

1962 } else if (MBBI->isDebugInstr()) {

1963 continue;

1964 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||

1965 MBBI->getOpcode() == ARM::t2STRDi8) {

1966

1967

1968 MergeBaseCandidates.push_back(&*MBBI);

1969 }

1970

1971

1972 if (MemOps.size() > 0) {

1973 FormCandidates(MemOps);

1974

1975 CurrBase = 0;

1976 CurrOpc = ~0u;

1978 MemOps.clear();

1979 }

1980 }

1981 if (MemOps.size() > 0)

1982 FormCandidates(MemOps);

1983

1984

1985

1986 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {

1987 return M0->InsertPos < M1->InsertPos;

1988 };

1990

1991

1992 bool Changed = false;

1993 for (const MergeCandidate *Candidate : Candidates) {

1994 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {

1995 MachineInstr *Merged = MergeOpsUpdate(*Candidate);

1996

1997 if (Merged) {

1998 Changed = true;

1999 unsigned Opcode = Merged->getOpcode();

2000 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)

2001 MergeBaseUpdateLSDouble(*Merged);

2002 else

2003 MergeBaseUpdateLSMultiple(Merged);

2004 } else {

2006 if (MergeBaseUpdateLoadStore(MI))

2007 Changed = true;

2008 }

2009 }

2010 } else {

2011 assert(Candidate->Instrs.size() == 1);

2012 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))

2013 Changed = true;

2014 }

2015 }

2016 Candidates.clear();

2017

2019 MergeBaseUpdateLSDouble(*MI);

2020 MergeBaseCandidates.clear();

2021

2022 return Changed;

2023}

2024

2025

2026

2027

2028

2029

2030

2031

2032

2033

2034

2036

2037 if (isThumb1) return false;

2038 if (MBB.empty()) return false;

2039

2042 (MBBI->getOpcode() == ARM::BX_RET ||

2043 MBBI->getOpcode() == ARM::tBX_RET ||

2044 MBBI->getOpcode() == ARM::MOVPCLR)) {

2046

2047 while (PrevI->isDebugInstr() && PrevI != MBB.begin())

2048 --PrevI;

2050 unsigned Opcode = PrevMI.getOpcode();

2051 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||

2052 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||

2053 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {

2055 if (MO.getReg() != ARM::LR)

2056 return false;

2057 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);

2058 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||

2059 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");

2064 return true;

2065 }

2066 }

2067 return false;

2068}

2069

2073 MBBI->getOpcode() != ARM::tBX_RET)

2074 return false;

2075

2077 --Prev;

2078 if (Prev->getOpcode() != ARM::tMOVr ||

2079 !Prev->definesRegister(ARM::LR, nullptr))

2080 return false;

2081

2082 for (auto Use : Prev->uses())

2083 if (Use.isKill()) {

2084 assert(STI->hasV4TOps());

2091 return true;

2092 }

2093

2094 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");

2095}

2096

2097bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

2099 return false;

2100

2101 MF = &Fn;

2105 TII = STI->getInstrInfo();

2106 TRI = STI->getRegisterInfo();

2107

2108 RegClassInfoValid = false;

2109 isThumb2 = AFI->isThumb2Function();

2110 isThumb1 = AFI->isThumbFunction() && !isThumb2;

2111

2112 bool Modified = false, ModifiedLDMReturn = false;

2115 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())

2116 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);

2117 if (isThumb1)

2119 }

2120 Modified |= ModifiedLDMReturn;

2121

2122

2123

2124

2125

2126 if (ModifiedLDMReturn)

2128

2131}

2132

2133#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \

2134 "ARM pre- register allocation load / store optimization pass"

2135

2136namespace {

2137

2138

2139

2141 static char ID;

2142

2151

2153

2155

2158 }

2159

2165 }

2166

2167 private:

2172 bool RescheduleOps(

2177 bool DistributeIncrements();

2179 };

2180

2181}

2182

2183char ARMPreAllocLoadStoreOpt::ID = 0;

2184

2190

2191

2192

2194 cl::init(8), cl::Hidden);

2195

2196bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {

2198 return false;

2199

2200 TD = &Fn.getDataLayout();

2202 TII = STI->getInstrInfo();

2203 TRI = STI->getRegisterInfo();

2204 MRI = &Fn.getRegInfo();

2205 DT = &getAnalysis().getDomTree();

2206 MF = &Fn;

2207 AA = &getAnalysis().getAAResults();

2208

2209 bool Modified = DistributeIncrements();

2211 Modified |= RescheduleLoadStoreInstrs(&MFI);

2212

2214}

2215

2223

2225 while (++I != E) {

2226 if (I->isDebugInstr() || MemOps.count(&*I))

2227 continue;

2228 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())

2229 return false;

2230 if (I->mayStore() || (!isLd && I->mayLoad()))

2232 if (I->mayAlias(AA, *MemOp, false))

2233 return false;

2234 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {

2236 if (!MO.isReg())

2237 continue;

2239 if (MO.isDef() && TRI->regsOverlap(Reg, Base))

2240 return false;

2241 if (Reg != Base && !MemRegs.count(Reg))

2242 AddedRegPressure.insert(Reg);

2243 }

2244 }

2245

2246

2247 if (MemRegs.size() <= 4)

2248

2249 return true;

2250 return AddedRegPressure.size() <= MemRegs.size() * 2;

2251}

2252

2253bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(

2257

2258 if (!STI->hasV5TEOps())

2259 return false;

2260

2261

2262 unsigned Scale = 1;

2263 unsigned Opcode = Op0->getOpcode();

2264 if (Opcode == ARM::LDRi12) {

2265 NewOpc = ARM::LDRD;

2266 } else if (Opcode == ARM::STRi12) {

2267 NewOpc = ARM::STRD;

2268 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {

2269 NewOpc = ARM::t2LDRDi8;

2270 Scale = 4;

2271 isT2 = true;

2272 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {

2273 NewOpc = ARM::t2STRDi8;

2274 Scale = 4;

2275 isT2 = true;

2276 } else {

2277 return false;

2278 }

2279

2280

2281

2282

2286 return false;

2287

2289 Align ReqAlign = STI->getDualLoadStoreAlignment();

2290 if (Alignment < ReqAlign)

2291 return false;

2292

2293

2295 if (isT2) {

2296 int Limit = (1 << 8) * Scale;

2297 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))

2298 return false;

2300 } else {

2302 if (OffImm < 0) {

2304 OffImm = - OffImm;

2305 }

2306 int Limit = (1 << 8) * Scale;

2307 if (OffImm >= Limit || (OffImm & (Scale-1)))

2308 return false;

2310 }

2313 if (FirstReg == SecondReg)

2314 return false;

2318 return true;

2319}

2320

2321bool ARMPreAllocLoadStoreOpt::RescheduleOps(

2325 bool RetVal = false;

2326

2327

2331 assert(LHS == RHS || LOffset != ROffset);

2332 return LOffset > ROffset;

2333 });

2334

2335

2336

2337

2338

2339 while (Ops.size() > 1) {

2340 unsigned FirstLoc = ~0U;

2341 unsigned LastLoc = 0;

2344 int LastOffset = 0;

2345 unsigned LastOpcode = 0;

2346 unsigned LastBytes = 0;

2347 unsigned NumMove = 0;

2349

2350 unsigned LSMOpcode

2352 if (LastOpcode && LSMOpcode != LastOpcode)

2353 break;

2354

2355

2358 if (LastBytes) {

2359 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))

2360 break;

2361 }

2362

2363

2365 break;

2366

2367

2368 ++NumMove;

2369 LastOffset = Offset;

2370 LastBytes = Bytes;

2371 LastOpcode = LSMOpcode;

2372

2373 unsigned Loc = MI2LocMap[Op];

2374 if (Loc <= FirstLoc) {

2375 FirstLoc = Loc;

2376 FirstOp = Op;

2377 }

2378 if (Loc >= LastLoc) {

2379 LastLoc = Loc;

2380 LastOp = Op;

2381 }

2382 }

2383

2384 if (NumMove <= 1)

2386 else {

2389 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {

2390 MemOps.insert(Ops[i]);

2391 MemRegs.insert(Ops[i]->getOperand(0).getReg());

2392 }

2393

2394

2395

2396 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;

2397 if (DoMove)

2399 MemOps, MemRegs, TRI, AA);

2400 if (!DoMove) {

2401 for (unsigned i = 0; i != NumMove; ++i)

2403 } else {

2404

2406 while (InsertPos != MBB->end() &&

2407 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))

2408 ++InsertPos;

2409

2410

2411

2414 Register FirstReg, SecondReg;

2417 bool isT2 = false;

2418 unsigned NewOpc = 0;

2421 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,

2422 FirstReg, SecondReg, BaseReg,

2423 Offset, PredReg, Pred, isT2)) {

2426

2429 MRI->constrainRegClass(FirstReg, TRC);

2430 MRI->constrainRegClass(SecondReg, TRC);

2431

2432

2433 if (isLd) {

2438

2439

2440

2441 if (!isT2)

2446 ++NumLDRDFormed;

2447 } else {

2452

2453

2454

2455 if (!isT2)

2460 ++NumSTRDFormed;

2461 }

2464

2465 if (!isT2) {

2466

2469 }

2470 } else {

2471 for (unsigned i = 0; i != NumMove; ++i) {

2473 if (isLd) {

2474

2476 RegisterMap[Reg];

2477 }

2478

2480 }

2481 }

2482

2483 NumLdStMoved += NumMove;

2484 RetVal = true;

2485 }

2486 }

2487 }

2488

2489 return RetVal;

2490}

2491

2494 if (MI->isNonListDebugValue()) {

2495 auto &Op = MI->getOperand(0);

2496 if (Op.isReg())

2497 Fn(Op);

2498 } else {

2499 for (unsigned I = 2; I < MI->getNumOperands(); I++) {

2500 auto &Op = MI->getOperand(I);

2501 if (Op.isReg())

2502 Fn(Op);

2503 }

2504 }

2505}

2506

2507

2508

2512

2514 auto RegIt = RegisterMap.find(Op.getReg());

2515 if (RegIt == RegisterMap.end())

2516 return;

2517 auto &InstrVec = RegIt->getSecond();

2518 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);

2519 });

2520}

2521

2523 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),

2524 MI->getDebugLoc()->getInlinedAt());

2525 return DbgVar;

2526}

2527

2528bool

2530 bool RetVal = false;

2531

2535 Base2InstMap Base2LdsMap;

2536 Base2InstMap Base2StsMap;

2537 BaseVec LdBases;

2538 BaseVec StBases;

2539

2540

2541

2543

2544 unsigned Loc = 0;

2547 while (MBBI != E) {

2550 if (MI.isCall() || MI.isTerminator()) {

2551

2553 break;

2554 }

2555

2556 if (MI.isDebugInstr())

2557 MI2LocMap[&MI] = ++Loc;

2558

2560 continue;

2563 continue;

2564

2565 int Opc = MI.getOpcode();

2569 bool StopHere = false;

2570 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {

2571 auto [BI, Inserted] = Base2Ops.try_emplace(Base);

2572 if (Inserted) {

2573 BI->second.push_back(&MI);

2574 Bases.push_back(Base);

2575 return;

2576 }

2579 StopHere = true;

2580 break;

2581 }

2582 }

2583 if (!StopHere)

2584 BI->second.push_back(&MI);

2585 };

2586

2587 if (isLd)

2588 FindBases(Base2LdsMap, LdBases);

2589 else

2590 FindBases(Base2StsMap, StBases);

2591

2592 if (StopHere) {

2593

2594

2595 --Loc;

2596 break;

2597 }

2598 }

2599

2600

2601 for (unsigned Base : LdBases) {

2603 if (Lds.size() > 1)

2604 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);

2605 }

2606

2607

2608 for (unsigned Base : StBases) {

2610 if (Sts.size() > 1)

2611 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);

2612 }

2613

2614 if (MBBI != E) {

2615 Base2LdsMap.clear();

2616 Base2StsMap.clear();

2617 LdBases.clear();

2618 StBases.clear();

2619 }

2620 }

2621

2622

2623

2624

2625

2626

2627

2628

2629

2630

2631

2632

2633

2634

2635

2636

2637

2638

2639

2640

2641

2642

2643

2644

2645

2646

2647

2648

2649

2650

2651

2652

2653

2654

2655

2656

2657

2658

2659

2660

2661

2662

2663

2664

2665

2666

2667

2668

2669

2670

2671

2672

2673

2674

2675

2676

2677

2678

2679

2680

2681

2682

2683

2684

2685

2686

2687

2688

2689

2690

2691

2692

2693

2694

2695

2696

2697

2698

2699

2700

2701

2702

2703

2704

2705

2706

2707

2708

2709

2710

2711

2712

2713

2714

2715

2716

2717

2718

2719

2720

2721

2722

2723

2724

2725

2726

2727

2728

2729

2730

2731

2732

2733

2734

2735

2736

2737

2738

2739

2740

2741

2742

2743

2744

2745

2746

2747

2748

2749

2750

2751

2752

2753

2754

2755

2756

2757

2758

2759

2760

2761

2762

2763

2764

2765

2766

2767

2768

2769

2770

2771

2773

2774

2778

2779 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {

2780 auto RegIt = RegisterMap.find(Reg);

2781 if (RegIt == RegisterMap.end())

2782 return;

2783 auto &InstrVec = RegIt->getSecond();

2784 InstrVec.push_back(&MI);

2785 InstrMap[&MI].push_back(Reg);

2786 };

2787

2788 if (MI.isDebugValue()) {

2789 assert(MI.getDebugVariable() &&

2790 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");

2791

2793

2794

2795

2796

2798 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());

2799 });

2800

2801

2802

2803

2804

2805

2806 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);

2807 if (InstrIt != DbgValueSinkCandidates.end()) {

2808 auto *Instr = InstrIt->getSecond();

2809 auto RegIt = InstrMap.find(Instr);

2810 if (RegIt != InstrMap.end()) {

2811 const auto &RegVec = RegIt->getSecond();

2812

2813

2814 for (auto &Reg : RegVec) {

2815 auto RegIt = RegisterMap.find(Reg);

2816 if (RegIt == RegisterMap.end())

2817 continue;

2818 auto &InstrVec = RegIt->getSecond();

2821 return Var == DbgVar;

2822 };

2823

2825 }

2828 }

2829 }

2830 DbgValueSinkCandidates[DbgVar] = &MI;

2831 } else {

2832

2833

2834 auto Opc = MI.getOpcode();

2836 continue;

2837 auto Reg = MI.getOperand(0).getReg();

2838 auto RegIt = RegisterMap.find(Reg);

2839 if (RegIt == RegisterMap.end())

2840 continue;

2841 auto &DbgInstrVec = RegIt->getSecond();

2842 if (!DbgInstrVec.size())

2843 continue;

2844 for (auto *DbgInstr : DbgInstrVec) {

2846 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);

2847 MBB->insert(InsertPos, ClonedMI);

2849

2850

2852 auto DbgIt = DbgValueSinkCandidates.find(DbgVar);

2853

2854

2855

2856 if (DbgIt != DbgValueSinkCandidates.end())

2857 DbgValueSinkCandidates.erase(DbgIt);

2858

2861

2862

2863 if (DbgInstr->isDebugValueList())

2865 DbgInstr);

2866 }

2867 }

2868 }

2869 return RetVal;

2870}

2871

2872

2873

2874

2875

2877 switch (MI.getOpcode()) {

2878 case ARM::MVE_VLDRBS16:

2879 case ARM::MVE_VLDRBS32:

2880 case ARM::MVE_VLDRBU16:

2881 case ARM::MVE_VLDRBU32:

2882 case ARM::MVE_VLDRHS32:

2883 case ARM::MVE_VLDRHU32:

2884 case ARM::MVE_VLDRBU8:

2885 case ARM::MVE_VLDRHU16:

2886 case ARM::MVE_VLDRWU32:

2887 case ARM::MVE_VSTRB16:

2888 case ARM::MVE_VSTRB32:

2889 case ARM::MVE_VSTRH32:

2890 case ARM::MVE_VSTRBU8:

2891 case ARM::MVE_VSTRHU16:

2892 case ARM::MVE_VSTRWU32:

2893 case ARM::t2LDRHi8:

2894 case ARM::t2LDRHi12:

2895 case ARM::t2LDRSHi8:

2896 case ARM::t2LDRSHi12:

2897 case ARM::t2LDRBi8:

2898 case ARM::t2LDRBi12:

2899 case ARM::t2LDRSBi8:

2900 case ARM::t2LDRSBi12:

2901 case ARM::t2STRBi8:

2902 case ARM::t2STRBi12:

2903 case ARM::t2STRHi8:

2904 case ARM::t2STRHi12:

2905 return 1;

2906 case ARM::MVE_VLDRBS16_post:

2907 case ARM::MVE_VLDRBS32_post:

2908 case ARM::MVE_VLDRBU16_post:

2909 case ARM::MVE_VLDRBU32_post:

2910 case ARM::MVE_VLDRHS32_post:

2911 case ARM::MVE_VLDRHU32_post:

2912 case ARM::MVE_VLDRBU8_post:

2913 case ARM::MVE_VLDRHU16_post:

2914 case ARM::MVE_VLDRWU32_post:

2915 case ARM::MVE_VSTRB16_post:

2916 case ARM::MVE_VSTRB32_post:

2917 case ARM::MVE_VSTRH32_post:

2918 case ARM::MVE_VSTRBU8_post:

2919 case ARM::MVE_VSTRHU16_post:

2920 case ARM::MVE_VSTRWU32_post:

2921 case ARM::MVE_VLDRBS16_pre:

2922 case ARM::MVE_VLDRBS32_pre:

2923 case ARM::MVE_VLDRBU16_pre:

2924 case ARM::MVE_VLDRBU32_pre:

2925 case ARM::MVE_VLDRHS32_pre:

2926 case ARM::MVE_VLDRHU32_pre:

2927 case ARM::MVE_VLDRBU8_pre:

2928 case ARM::MVE_VLDRHU16_pre:

2929 case ARM::MVE_VLDRWU32_pre:

2930 case ARM::MVE_VSTRB16_pre:

2931 case ARM::MVE_VSTRB32_pre:

2932 case ARM::MVE_VSTRH32_pre:

2933 case ARM::MVE_VSTRBU8_pre:

2934 case ARM::MVE_VSTRHU16_pre:

2935 case ARM::MVE_VSTRWU32_pre:

2936 return 2;

2937 }

2938 return -1;

2939}

2940

2942 switch (MI.getOpcode()) {

2943 case ARM::MVE_VLDRBS16_post:

2944 case ARM::MVE_VLDRBS32_post:

2945 case ARM::MVE_VLDRBU16_post:

2946 case ARM::MVE_VLDRBU32_post:

2947 case ARM::MVE_VLDRHS32_post:

2948 case ARM::MVE_VLDRHU32_post:

2949 case ARM::MVE_VLDRBU8_post:

2950 case ARM::MVE_VLDRHU16_post:

2951 case ARM::MVE_VLDRWU32_post:

2952 case ARM::MVE_VSTRB16_post:

2953 case ARM::MVE_VSTRB32_post:

2954 case ARM::MVE_VSTRH32_post:

2955 case ARM::MVE_VSTRBU8_post:

2956 case ARM::MVE_VSTRHU16_post:

2957 case ARM::MVE_VSTRWU32_post:

2958 return true;

2959 }

2960 return false;

2961}

2962

2964 switch (MI.getOpcode()) {

2965 case ARM::MVE_VLDRBS16_pre:

2966 case ARM::MVE_VLDRBS32_pre:

2967 case ARM::MVE_VLDRBU16_pre:

2968 case ARM::MVE_VLDRBU32_pre:

2969 case ARM::MVE_VLDRHS32_pre:

2970 case ARM::MVE_VLDRHU32_pre:

2971 case ARM::MVE_VLDRBU8_pre:

2972 case ARM::MVE_VLDRHU16_pre:

2973 case ARM::MVE_VLDRWU32_pre:

2974 case ARM::MVE_VSTRB16_pre:

2975 case ARM::MVE_VSTRB32_pre:

2976 case ARM::MVE_VSTRH32_pre:

2977 case ARM::MVE_VSTRBU8_pre:

2978 case ARM::MVE_VSTRHU16_pre:

2979 case ARM::MVE_VSTRWU32_pre:

2980 return true;

2981 }

2982 return false;

2983}

2984

2985

2986

2987

2988

2989

2992 int &CodesizeEstimate) {

2994 return true;

2995

2996

3001 CodesizeEstimate += 1;

3002 return Imm < 0 && -Imm < ((1 << 8) * 1);

3003 }

3004 return false;

3005}

3006

3007

3008

3009

3013

3015 MI->getOperand(BaseOp).setReg(NewBaseReg);

3016

3021 MRI.constrainRegClass(NewBaseReg, TRC);

3022

3023 int OldOffset = MI->getOperand(BaseOp + 1).getImm();

3025 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);

3026 else {

3027 unsigned ConvOpcode;

3028 switch (MI->getOpcode()) {

3029 case ARM::t2LDRHi12:

3030 ConvOpcode = ARM::t2LDRHi8;

3031 break;

3032 case ARM::t2LDRSHi12:

3033 ConvOpcode = ARM::t2LDRSHi8;

3034 break;

3035 case ARM::t2LDRBi12:

3036 ConvOpcode = ARM::t2LDRBi8;

3037 break;

3038 case ARM::t2LDRSBi12:

3039 ConvOpcode = ARM::t2LDRSBi8;

3040 break;

3041 case ARM::t2STRHi12:

3042 ConvOpcode = ARM::t2STRHi8;

3043 break;

3044 case ARM::t2STRBi12:

3045 ConvOpcode = ARM::t2STRBi8;

3046 break;

3047 default:

3049 }

3051 "Illegal Address Immediate after convert!");

3052

3054 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)

3055 .add(MI->getOperand(0))

3056 .add(MI->getOperand(1))

3058 .add(MI->getOperand(3))

3059 .add(MI->getOperand(4))

3061 MI->eraseFromParent();

3062 }

3063}

3064

3071

3074

3076

3078 MRI.constrainRegClass(NewReg, TRC);

3079

3080 TRC = TII->getRegClass(MCID, 2, TRI, *MF);

3081 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);

3082

3088

3089 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)

3091 .add(MI->getOperand(0))

3092 .add(MI->getOperand(1))

3094 .add(MI->getOperand(3))

3095 .add(MI->getOperand(4))

3096 .add(MI->getOperand(5))

3099 if (MI->mayLoad()) {

3100 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)

3101 .add(MI->getOperand(0))

3103 .add(MI->getOperand(1))

3105 .add(MI->getOperand(3))

3106 .add(MI->getOperand(4))

3108 } else {

3109 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)

3111 .add(MI->getOperand(0))

3112 .add(MI->getOperand(1))

3114 .add(MI->getOperand(3))

3115 .add(MI->getOperand(4))

3117 }

3118 default:

3120 }

3121}

3122

3123

3124

3125

3126

3127

3128

3129

3130

3131

3132

3133

3134

3135

3136

3137

3138

3139bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {

3140

3141

3144

3146

3147

3149 for (auto &Use : MRI->use_nodbg_instructions(Base)) {

3152 continue;

3153 }

3154

3156 if (BaseOp == -1)

3157 return false;

3158

3159 if (Use.getOperand(BaseOp).isReg() ||

3160 Use.getOperand(BaseOp).getReg() != Base)

3161 return false;

3163 PrePostInc = &Use;

3164 else if (Use.getOperand(BaseOp + 1).getImm() == 0)

3165 BaseAccess = &Use;

3166 else

3168 }

3169

3170 int IncrementOffset;

3172 if (BaseAccess && Increment) {

3173 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())

3174 return false;

3176 if (Increment->definesRegister(ARM::CPSR, nullptr) ||

3178 return false;

3179

3180 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "

3181 << Base.virtRegIndex() << "\n");

3182

3183

3184

3186 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {

3187 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&

3189 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");

3190 return false;

3191 }

3192 }

3193

3194

3199 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");

3200 return false;

3201 }

3202 }

3203 else if (PrePostInc) {

3204

3205

3206

3207

3208 if (Increment)

3209 return false;

3210

3211 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "

3212 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");

3214 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();

3215 BaseAccess = PrePostInc;

3217 }

3218 else

3219 return false;

3220

3221

3222

3223

3224

3225

3226

3227

3228

3230 int CodesizeEstimate = -1;

3231 for (auto *Use : OtherAccesses) {

3236 Use->getOperand(BaseOp + 1).getImm() -

3237 IncrementOffset,

3238 TII, CodesizeEstimate)) {

3239 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");

3240 return false;

3241 }

3242 } else if (!DT->dominates(Use, BaseAccess)) {

3244 dbgs() << " Unknown dominance relation between Base and Use\n");

3245 return false;

3246 }

3247 }

3248 if (STI->hasMinSize() && CodesizeEstimate > 0) {

3249 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");

3250 return false;

3251 }

3252

3253 if (!PrePostInc) {

3254

3257 NewBaseReg = Increment->getOperand(0).getReg();

3262 (void)BaseAccessPost;

3264 }

3265

3266 for (auto *Use : SuccessorAccesses) {

3270 }

3271

3272

3273

3275 Op.setIsKill(false);

3276 return true;

3277}

3278

3279bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {

3280 bool Changed = false;

3282 for (auto &MBB : *MF) {

3283 for (auto &MI : MBB) {

3285 if (BaseOp == -1 || MI.getOperand(BaseOp).isReg())

3286 continue;

3287

3289 if (Base.isVirtual())

3290 continue;

3291

3293 }

3294 }

3295

3296 for (auto Base : Visited)

3297 Changed |= DistributeIncrements(Base);

3298

3299 return Changed;

3300}

3301

3302

3304 if (PreAlloc)

3305 return new ARMPreAllocLoadStoreOpt();

3306 return new ARMLoadStoreOpt();

3307}

unsigned const MachineRegisterInfo * MRI

static bool isLoadSingle(unsigned Opc)

static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)

static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)

static bool isPreIndex(MachineInstr &MI)

static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)

static bool isPostIndex(MachineInstr &MI)

static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)

static bool isMemoryOp(const MachineInstr &MI)

Returns true if instruction is a memory operation that this pass is capable of operating on.

static unsigned getLSMultipleTransferSize(const MachineInstr *MI)

static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)

static bool isT1i32Load(unsigned Opc)

static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool > > &Regs, unsigned Reg)

static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)

static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)

static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)

static bool isi32Store(unsigned Opc)

static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)

Searches for a increment or decrement of Reg after MBBI.

static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)

Searches for an increment or decrement of Reg before MBBI.

static int getMemoryOpOffset(const MachineInstr &MI)

static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)

static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)

arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)

static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)

static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)

Check if the given instruction increments or decrements a register and return the amount it is increm...

static bool isT2i32Store(unsigned Opc)

static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)

static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)

Return true for loads/stores that can be combined to a double/multi operation without increasing the ...

static int getBaseOperandIndex(MachineInstr &MI)

static bool isT2i32Load(unsigned Opc)

static bool isi32Load(unsigned Opc)

static unsigned getImmScale(unsigned Opc)

static bool isT1i32Store(unsigned Opc)

#define ARM_PREALLOC_LOAD_STORE_OPT_NAME

#define ARM_LOAD_STORE_OPT_NAME

static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)

static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)

static bool isValidLSDoubleOffset(int Offset)

static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)

static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))

This switch disables formation of double/multi instructions that could potentially lead to (new) alig...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

MachineBasicBlock MachineBasicBlock::iterator MBBI

This file defines the BumpPtrAllocator interface.

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

const HexagonInstrInfo * TII

static MaybeAlign getAlign(Value *Ptr)

unsigned const TargetRegisterInfo * TRI

static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This file describes how to lower LLVM code to machine code.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

static void updateLRRestored(MachineFunction &MF)

Update the IsRestored flag on LR if it is spilled, based on the return instructions.

ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...

const ARMTargetLowering * getTargetLowering() const override

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

This class represents an Operation in the Expression.

A parsed version of the target data layout string in and methods for querying it.

Identifies a unique instance of a variable.

iterator find(const_arg_type_t< KeyT > Val)

bool erase(const KeyT &Val)

Implements a dense probed hash-table based set.

bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

FunctionPass class - This class is used to implement most global optimizations.

A set of register units used to track register liveness.

Describe properties that are true of each instruction in the target description file.

instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const

Return whether (physical) register Reg has been defined and not killed as of just before Before.

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

iterator getLastNonDebugInstr(bool SkipPseudoOp=true)

Returns an iterator to the last non-debug instruction in the basic block, or end().

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

MachineInstrBundleIterator< MachineInstr > iterator

@ LQR_Dead

Register is known to be fully dead.

Analysis pass which computes a MachineDominatorTree.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

virtual MachineFunctionProperties getRequiredProperties() const

Properties which a MachineFunction may have at a given point in time.

MachineFunctionProperties & set(Property P)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const

const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const

const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const

Copy all the implicit operands from OtherMI onto this one.

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

unsigned getNumOperands() const

Retuns the total number of operands.

void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)

Copy implicit register operands from specified instruction to this instruction.

bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const

Return true if the MachineInstr kills the specified register.

void setDesc(const MCInstrDesc &TID)

Replace the instruction descriptor (thus opcode) of the current instruction with a new one.

bool hasOneMemOperand() const

Return true if this instruction has exactly one MachineMemOperand.

mmo_iterator memoperands_begin() const

Access to memory operands of the instruction.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

bool isAtomic() const

Returns true if this operation has an atomic ordering requirement of unordered or higher,...

Align getAlign() const

Return the minimum known alignment in bytes of the actual memory reference.

MachineOperand class - Representation of each machine instruction operand.

void setImm(int64_t immVal)

bool readsReg() const

readsReg - Returns true if this operand reads the previous value of its register.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

void setReg(Register Reg)

Change the register this operand corresponds to.

void setIsKill(bool Val=true)

void setIsUndef(bool Val=true)

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

Wrapper class representing virtual and physical registers.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

A BumpPtrAllocator that allows only elements of a specific type to be allocated.

StringRef - Represent a constant reference to a string, i.e.

Align getTransientStackAlign() const

getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...

TargetInstrInfo - Interface to description of machine instruction set.

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

TargetSubtargetInfo - Generic base class for all target subtargets.

virtual const TargetFrameLowering * getFrameLowering() const

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

void dump() const

Support for debugging, callable in GDB: V->dump()

std::pair< iterator, bool > insert(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned char getAM3Offset(unsigned AM3Opc)

unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)

AddrOpc getAM5Op(unsigned AM5Opc)

unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)

getAM3Opc - This function encodes the addrmode3 opc field.

unsigned char getAM5Offset(unsigned AM5Opc)

AddrOpc getAM3Op(unsigned AM3Opc)

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Define

Register definition.

@ Kill

The last use of a register.

@ CE

Windows NT (Windows on ARM)

Reg

All possible values of the reg field in the ModR/M byte.

initializer< Ty > init(const Ty &Val)

NodeAddr< InstrNode * > Instr

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

static bool isARMLowRegister(MCRegister Reg)

isARMLowRegister - Returns true if the register is a low register (r0-r7).

APFloat abs(APFloat X)

Returns the absolute value of the argument.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)

unsigned getDeadRegState(bool B)

static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)

Get the operands corresponding to the given Pred value.

FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)

Returns an instance of the load / store optimization pass.

unsigned M1(unsigned Val)

auto reverse(ContainerTy &&C)

void sort(IteratorTy Start, IteratorTy End)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

unsigned getUndefRegState(bool B)

unsigned getDefRegState(bool B)

void replace(R &&Range, const T &OldValue, const T &NewValue)

Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.

unsigned getKillRegState(bool B)

ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)

getInstrPredicate - If instruction is predicated, returns its predicate condition,...

DWARFExpression::Operation Op

unsigned M0(unsigned Val)

static MachineOperand t1CondCodeOp(bool isDead=false)

Get the operand corresponding to the conditional code result for Thumb1.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

static MachineOperand condCodeOp(unsigned CCReg=0)

Get the operand corresponding to the conditional code result.

int getAddSubImmediate(MachineInstr &MI)

This struct is a compact representation of a valid (non-zero power of two) alignment.

Description of the encoding of one expression Op.