LLVM: lib/Target/ARM/ARMLoadStoreOptimizer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
63#include
64#include
65#include
66#include
67#include
68#include
69
70using namespace llvm;
71
72#define DEBUG_TYPE "arm-ldst-opt"
73
74STATISTIC(NumLDMGened , "Number of ldm instructions generated");
75STATISTIC(NumSTMGened , "Number of stm instructions generated");
76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
85
86
87
88
89
90
93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
94
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
96
97namespace {
98
99
100
102 static char ID;
103
113 bool LiveRegsValid;
114 bool RegClassInfoValid;
115 bool isThumb1, isThumb2;
116
118
120
123 MachineFunctionProperties::Property::NoVRegs);
124 }
125
127
128 private:
129
130
131 struct MemOpQueueEntry {
133 int Offset;
134 unsigned Position;
135
136 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
137 : MI(&MI), Offset(Offset), Position(Position) {}
138 };
140
141
142
143 struct MergeCandidate {
144
146
147
148 unsigned LatestMIIdx;
149
150
151 unsigned EarliestMIIdx;
152
153
154
155 unsigned InsertPos;
156
157
158 bool CanMergeToLSMulti;
159
160
161 bool CanMergeToLSDouble;
162 };
166
172 unsigned Base, unsigned WordOffset,
176 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
178 ArrayRef<std::pair<unsigned, bool>> Regs,
182 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
184 ArrayRef<std::pair<unsigned, bool>> Regs,
186 void FormCandidates(const MemOpQueue &MemOps);
187 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
192 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
196 };
197
198}
199
200char ARMLoadStoreOpt::ID = 0;
201
203 false)
204
206 for (const auto &MO : MI.operands()) {
207 if (!MO.isReg())
208 continue;
209 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
210
211
212 return true;
213 }
214
215 return false;
216}
217
219 unsigned Opcode = MI.getOpcode();
220 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
221 unsigned NumOperands = MI.getDesc().getNumOperands();
222 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
223
224 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
225 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
226 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
227 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
228 return OffField;
229
230
231 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
232 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
233 return OffField * 4;
234
239
242
244}
245
247 return MI.getOperand(1);
248}
249
251 return MI.getOperand(0);
252}
253
255 switch (Opcode) {
257 case ARM::LDRi12:
258 ++NumLDMGened;
259 switch (Mode) {
265 }
266 case ARM::STRi12:
267 ++NumSTMGened;
268 switch (Mode) {
274 }
275 case ARM::tLDRi:
276 case ARM::tLDRspi:
277
278
279 ++NumLDMGened;
280 switch (Mode) {
283 }
284 case ARM::tSTRi:
285 case ARM::tSTRspi:
286
287 ++NumSTMGened;
288 switch (Mode) {
290 case ARM_AM::ia: return ARM::tSTMIA_UPD;
291 }
292 case ARM::t2LDRi8:
293 case ARM::t2LDRi12:
294 ++NumLDMGened;
295 switch (Mode) {
297 case ARM_AM::ia: return ARM::t2LDMIA;
298 case ARM_AM::db: return ARM::t2LDMDB;
299 }
300 case ARM::t2STRi8:
301 case ARM::t2STRi12:
302 ++NumSTMGened;
303 switch (Mode) {
305 case ARM_AM::ia: return ARM::t2STMIA;
306 case ARM_AM::db: return ARM::t2STMDB;
307 }
308 case ARM::VLDRS:
309 ++NumVLDMGened;
310 switch (Mode) {
312 case ARM_AM::ia: return ARM::VLDMSIA;
313 case ARM_AM::db: return 0;
314 }
315 case ARM::VSTRS:
316 ++NumVSTMGened;
317 switch (Mode) {
319 case ARM_AM::ia: return ARM::VSTMSIA;
320 case ARM_AM::db: return 0;
321 }
322 case ARM::VLDRD:
323 ++NumVLDMGened;
324 switch (Mode) {
326 case ARM_AM::ia: return ARM::VLDMDIA;
327 case ARM_AM::db: return 0;
328 }
329 case ARM::VSTRD:
330 ++NumVSTMGened;
331 switch (Mode) {
333 case ARM_AM::ia: return ARM::VSTMDIA;
334 case ARM_AM::db: return 0;
335 }
336 }
337}
338
340 switch (Opcode) {
342 case ARM::LDMIA_RET:
343 case ARM::LDMIA:
344 case ARM::LDMIA_UPD:
345 case ARM::STMIA:
346 case ARM::STMIA_UPD:
347 case ARM::tLDMIA:
348 case ARM::tLDMIA_UPD:
349 case ARM::tSTMIA_UPD:
350 case ARM::t2LDMIA_RET:
351 case ARM::t2LDMIA:
352 case ARM::t2LDMIA_UPD:
353 case ARM::t2STMIA:
354 case ARM::t2STMIA_UPD:
355 case ARM::VLDMSIA:
356 case ARM::VLDMSIA_UPD:
357 case ARM::VSTMSIA:
358 case ARM::VSTMSIA_UPD:
359 case ARM::VLDMDIA:
360 case ARM::VLDMDIA_UPD:
361 case ARM::VSTMDIA:
362 case ARM::VSTMDIA_UPD:
364
365 case ARM::LDMDA:
366 case ARM::LDMDA_UPD:
367 case ARM::STMDA:
368 case ARM::STMDA_UPD:
370
371 case ARM::LDMDB:
372 case ARM::LDMDB_UPD:
373 case ARM::STMDB:
374 case ARM::STMDB_UPD:
375 case ARM::t2LDMDB:
376 case ARM::t2LDMDB_UPD:
377 case ARM::t2STMDB:
378 case ARM::t2STMDB_UPD:
379 case ARM::VLDMSDB_UPD:
380 case ARM::VSTMSDB_UPD:
381 case ARM::VLDMDDB_UPD:
382 case ARM::VSTMDDB_UPD:
384
385 case ARM::LDMIB:
386 case ARM::LDMIB_UPD:
387 case ARM::STMIB:
388 case ARM::STMIB_UPD:
390 }
391}
392
394 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
395}
396
398 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
399}
400
403}
404
406 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
407}
408
410 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
411}
412
415}
416
418 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
419}
420
422 switch (Opc) {
424 case ARM::tLDRi:
425 case ARM::tSTRi:
426 case ARM::tLDRspi:
427 case ARM::tSTRspi:
428 return 1;
429 case ARM::tLDRHi:
430 case ARM::tSTRHi:
431 return 2;
432 case ARM::tLDRBi:
433 case ARM::tSTRBi:
434 return 4;
435 }
436}
437
439 switch (MI->getOpcode()) {
440 default: return 0;
441 case ARM::LDRi12:
442 case ARM::STRi12:
443 case ARM::tLDRi:
444 case ARM::tSTRi:
445 case ARM::tLDRspi:
446 case ARM::tSTRspi:
447 case ARM::t2LDRi8:
448 case ARM::t2LDRi12:
449 case ARM::t2STRi8:
450 case ARM::t2STRi12:
451 case ARM::VLDRS:
452 case ARM::VSTRS:
453 return 4;
454 case ARM::VLDRD:
455 case ARM::VSTRD:
456 return 8;
457 case ARM::LDMIA:
458 case ARM::LDMDA:
459 case ARM::LDMDB:
460 case ARM::LDMIB:
461 case ARM::STMIA:
462 case ARM::STMDA:
463 case ARM::STMDB:
464 case ARM::STMIB:
465 case ARM::tLDMIA:
466 case ARM::tLDMIA_UPD:
467 case ARM::tSTMIA_UPD:
468 case ARM::t2LDMIA:
469 case ARM::t2LDMDB:
470 case ARM::t2STMIA:
471 case ARM::t2STMDB:
472 case ARM::VLDMSIA:
473 case ARM::VSTMSIA:
474 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
475 case ARM::VLDMDIA:
476 case ARM::VSTMDIA:
477 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
478 }
479}
480
481
482
486 unsigned WordOffset,
488 unsigned PredReg) {
489 assert(isThumb1 && "Can only update base register uses for Thumb1!");
490
491
493 bool InsertSub = false;
494 unsigned Opc = MBBI->getOpcode();
495
496 if (MBBI->readsRegister(Base, nullptr)) {
498 bool IsLoad =
499 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
500 bool IsStore =
501 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
502
503 if (IsLoad || IsStore) {
504
505
506
507
509 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
510
512
513
515
516 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
518 else
519 InsertSub = true;
520 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
521 !definesCPSR(*MBBI)) {
522
523
524
526 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
527 Offset = (Opc == ARM::tSUBi8) ?
528 MO.getImm() + WordOffset * 4 :
529 MO.getImm() - WordOffset * 4 ;
530 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
531
532
534
535 return;
536 } else {
537 InsertSub = true;
538 }
539 } else {
540
541 InsertSub = true;
542 }
543 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
544
545
546
547 InsertSub = true;
548 }
549
550 if (InsertSub) {
551
555 .addImm(WordOffset * 4)
558 return;
559 }
560
561 if (MBBI->killsRegister(Base, nullptr) ||
562 MBBI->definesRegister(Base, nullptr))
563
564 return;
565 }
566
567
569
570
571
572
577 .addImm(WordOffset * 4)
580 }
581}
582
583
584unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
585 if (!RegClassInfoValid) {
586 RegClassInfo.runOnMachineFunction(*MF);
587 RegClassInfoValid = true;
588 }
589
590 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
591 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
592 return Reg;
593 return 0;
594}
595
596
597
598
601
602 if (!LiveRegsValid) {
603 LiveRegs.init(*TRI);
604 LiveRegs.addLiveOuts(MBB);
606 LiveRegsValid = true;
607 }
608
609 while (LiveRegPos != Before) {
610 --LiveRegPos;
611 LiveRegs.stepBackward(*LiveRegPos);
612 }
613}
614
616 unsigned Reg) {
617 for (const std::pair<unsigned, bool> &R : Regs)
618 if (R.first == Reg)
619 return true;
620 return false;
621}
622
623
624
625
626MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
628 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
630 ArrayRef<std::pair<unsigned, bool>> Regs,
632 unsigned NumRegs = Regs.size();
634
635
636
637 bool SafeToClobberCPSR = !isThumb1 ||
640
641 bool Writeback = isThumb1;
642
643
644
645
647 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
648 if (Opcode == ARM::tLDRi)
649 Writeback = false;
650 else if (Opcode == ARM::tSTRi)
651 return nullptr;
652 }
653
655
657 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
658
659 if (Offset == 4 && haveIBAndDA) {
661 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
663 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
664
666 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
667
668
670
671
672
673
674 if (NumRegs <= 2)
675 return nullptr;
676
677
678
679 if (!SafeToClobberCPSR)
680 return nullptr;
681
682 unsigned NewBase;
684
685
686 NewBase = Regs[NumRegs-1].first;
687 Writeback = false;
688 } else {
689
690 moveLiveRegsBefore(MBB, InsertBefore);
691
692
694 for (const std::pair<unsigned, bool> &R : Regs)
695 LiveRegs.addReg(R.first);
696
697 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
698 if (NewBase == 0)
699 return nullptr;
700 }
701
702 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
703 : ARM::t2ADDri)
704 : (isThumb1 && Base == ARM::SP)
705 ? ARM::tADDrSPi
706 : (isThumb1 && Offset < 8)
707 ? ARM::tADDi3
708 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
709
711
712
714 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
715 : ARM::t2SUBri)
716 : (isThumb1 && Offset < 8 && Base != ARM::SP)
717 ? ARM::tSUBi3
718 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
719 }
720
721 if (!TL->isLegalAddImmediate(Offset))
722
723 return nullptr;
724
725
726
727 bool KillOldBase = BaseKill &&
729
730 if (isThumb1) {
731
732
733
734
735
736 if (Base != NewBase &&
737 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
738
740 !STI->hasV6Ops()) {
741
743 return nullptr;
744 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
746 } else
747 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
750
751
752 Base = NewBase;
753 KillOldBase = true;
754 }
755 if (BaseOpc == ARM::tADDrSPi) {
756 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
757 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
761 } else
762 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
767 } else {
768 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
773 }
774 Base = NewBase;
775 BaseKill = true;
776 }
777
779
780
781
783 if (!Opcode)
784 return nullptr;
785
786
787
788
789
790
791
792
793
794 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
795 return nullptr;
796
798
799 if (Writeback) {
800 assert(isThumb1 && "expected Writeback only inThumb1");
801 if (Opcode == ARM::tLDMIA) {
803
804 Opcode = ARM::tLDMIA_UPD;
805 }
806
808
809
812
813
814
815 if (!BaseKill)
816 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
817 } else {
818
821 }
822
824
825 for (const std::pair<unsigned, bool> &R : Regs)
827
829
831}
832
833MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
835 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
837 ArrayRef<std::pair<unsigned, bool>> Regs,
840 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
841 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
842
843 assert(Regs.size() == 2);
845 TII->get(LoadStoreOpcode));
846 if (IsLoad) {
849 } else {
852 }
856}
857
858
859MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
861 unsigned Opcode = First->getOpcode();
867
871 bool IsKill = MO.isKill();
872 if (IsKill)
873 KilledRegs.insert(Reg);
874 Regs.push_back(std::make_pair(Reg, IsKill));
875 UsedRegs.insert(Reg);
876
877 if (IsLoad) {
878
879
880
883 continue;
886
888 continue;
889
890 if (MI->readsRegister(DefReg, nullptr))
891 continue;
893 }
894 }
895 }
896
897
899
900 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
901 iterator InsertBefore = std::next(iterator(LatestMI));
910 if (Cand.CanMergeToLSDouble)
911 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
912 Opcode, Pred, PredReg, DL, Regs,
913 Cand.Instrs);
914 if (!Merged && Cand.CanMergeToLSMulti)
915 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
916 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
917 if (!Merged)
918 return nullptr;
919
920
921
922 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
923 bool EarliestAtBegin = false;
924 if (EarliestI == MBB.begin()) {
925 EarliestAtBegin = true;
926 } else {
927 EarliestI = std::prev(EarliestI);
928 }
929
930
933
934
935 if (EarliestAtBegin)
937 else
938 EarliestI = std::next(EarliestI);
939 auto FixupRange = make_range(EarliestI, iterator(Merged));
940
942
943
945 for (unsigned &ImpDefReg : ImpDefs) {
947 if (!MO.isReg() || MO.getReg() != ImpDefReg)
948 continue;
951 else if (MO.isDef())
952 ImpDefReg = 0;
953 }
954 }
955 }
956
958 for (unsigned ImpDef : ImpDefs)
960 } else {
961
962 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
966 continue;
969 }
970 }
971 assert(ImpDefs.empty());
972 }
973
974 return Merged;
975}
976
979
980
981 return (Value % 4) == 0 && Value < 1024;
982}
983
984
985
988
989
990 unsigned Opcode = MI.getOpcode();
992 return true;
993
994
995
998 return true;
999 return false;
1000}
1001
1002
1003void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1005 unsigned Opcode = FirstMI->getOpcode();
1008
1009 unsigned SIndex = 0;
1010 unsigned EIndex = MemOps.size();
1011 do {
1012
1014 int Offset = MemOps[SIndex].Offset;
1017 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits::max()
1018 : TRI->getEncodingValue(PReg);
1019 unsigned Latest = SIndex;
1020 unsigned Earliest = SIndex;
1021 unsigned Count = 1;
1022 bool CanMergeToLSDouble =
1024
1025
1026 if (STI->isCortexM3() && isi32Load(Opcode) &&
1028 CanMergeToLSDouble = false;
1029
1030 bool CanMergeToLSMulti = true;
1031
1032
1033 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1034 CanMergeToLSMulti = false;
1035
1036
1037
1038 if (PReg == ARM::SP || PReg == ARM::PC)
1039 CanMergeToLSMulti = CanMergeToLSDouble = false;
1040
1041
1043 CanMergeToLSMulti = CanMergeToLSDouble = false;
1044
1045
1046 unsigned Limit;
1047 switch (Opcode) {
1048 default:
1049 Limit = UINT_MAX;
1050 break;
1051 case ARM::VLDRD:
1052 case ARM::VSTRD:
1053 Limit = 16;
1054 break;
1055 }
1056
1057
1058 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1059 int NewOffset = MemOps[I].Offset;
1060 if (NewOffset != Offset + (int)Size)
1061 break;
1064 if (Reg == ARM::SP || Reg == ARM::PC)
1065 break;
1066 if (Count == Limit)
1067 break;
1068
1069
1070 unsigned RegNum = MO.isUndef() ? std::numeric_limits::max()
1071 : TRI->getEncodingValue(Reg);
1072 bool PartOfLSMulti = CanMergeToLSMulti;
1073 if (PartOfLSMulti) {
1074
1075 if (RegNum <= PRegNum)
1076 PartOfLSMulti = false;
1077
1078
1079
1080 else if (!isNotVFP && RegNum != PRegNum+1)
1081 PartOfLSMulti = false;
1082 }
1083
1084 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1085
1086 if (!PartOfLSMulti && !PartOfLSDouble)
1087 break;
1088 CanMergeToLSMulti &= PartOfLSMulti;
1089 CanMergeToLSDouble &= PartOfLSDouble;
1090
1091
1092 unsigned Position = MemOps[I].Position;
1093 if (Position < MemOps[Latest].Position)
1094 Latest = I;
1095 else if (Position > MemOps[Earliest].Position)
1096 Earliest = I;
1097
1099 PRegNum = RegNum;
1100 }
1101
1102
1103 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1104 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1105 Candidate->Instrs.push_back(MemOps[C].MI);
1106 Candidate->LatestMIIdx = Latest - SIndex;
1107 Candidate->EarliestMIIdx = Earliest - SIndex;
1108 Candidate->InsertPos = MemOps[Latest].Position;
1109 if (Count == 1)
1110 CanMergeToLSMulti = CanMergeToLSDouble = false;
1111 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1112 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1113 Candidates.push_back(Candidate);
1114
1115 SIndex += Count;
1116 } while (SIndex < EIndex);
1117}
1118
1121 switch (Opc) {
1123 case ARM::LDMIA:
1124 case ARM::LDMDA:
1125 case ARM::LDMDB:
1126 case ARM::LDMIB:
1127 switch (Mode) {
1129 case ARM_AM::ia: return ARM::LDMIA_UPD;
1130 case ARM_AM::ib: return ARM::LDMIB_UPD;
1131 case ARM_AM::da: return ARM::LDMDA_UPD;
1132 case ARM_AM::db: return ARM::LDMDB_UPD;
1133 }
1134 case ARM::STMIA:
1135 case ARM::STMDA:
1136 case ARM::STMDB:
1137 case ARM::STMIB:
1138 switch (Mode) {
1140 case ARM_AM::ia: return ARM::STMIA_UPD;
1141 case ARM_AM::ib: return ARM::STMIB_UPD;
1142 case ARM_AM::da: return ARM::STMDA_UPD;
1143 case ARM_AM::db: return ARM::STMDB_UPD;
1144 }
1145 case ARM::t2LDMIA:
1146 case ARM::t2LDMDB:
1147 switch (Mode) {
1149 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1150 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1151 }
1152 case ARM::t2STMIA:
1153 case ARM::t2STMDB:
1154 switch (Mode) {
1156 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1157 case ARM_AM::db: return ARM::t2STMDB_UPD;
1158 }
1159 case ARM::VLDMSIA:
1160 switch (Mode) {
1162 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1163 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1164 }
1165 case ARM::VLDMDIA:
1166 switch (Mode) {
1168 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1169 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1170 }
1171 case ARM::VSTMSIA:
1172 switch (Mode) {
1174 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1175 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1176 }
1177 case ARM::VSTMDIA:
1178 switch (Mode) {
1180 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1181 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1182 }
1183 }
1184}
1185
1186
1187
1188
1191 bool CheckCPSRDef;
1192 int Scale;
1193 switch (MI.getOpcode()) {
1194 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1195 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1196 case ARM::t2SUBri:
1197 case ARM::t2SUBspImm:
1198 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1199 case ARM::t2ADDri:
1200 case ARM::t2ADDspImm:
1201 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1202 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1203 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1204 default: return 0;
1205 }
1206
1208 if (MI.getOperand(0).getReg() != Reg ||
1209 MI.getOperand(1).getReg() != Reg ||
1211 MIPredReg != PredReg)
1212 return 0;
1213
1214 if (CheckCPSRDef && definesCPSR(MI))
1215 return 0;
1216 return MI.getOperand(2).getImm() * Scale;
1217}
1218
1219
1227 if (MBBI == BeginMBBI)
1228 return EndMBBI;
1229
1230
1232 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1233 --PrevMBBI;
1234
1236 return Offset == 0 ? EndMBBI : PrevMBBI;
1237}
1238
1239
1248 while (NextMBBI != EndMBBI) {
1249
1250 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1251 ++NextMBBI;
1252 if (NextMBBI == EndMBBI)
1253 return EndMBBI;
1254
1256 if (Off) {
1258 return NextMBBI;
1259 }
1260
1261
1262
1263
1264
1265
1266 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1267 NextMBBI->definesRegister(Reg, TRI))
1268 return EndMBBI;
1269
1270 ++NextMBBI;
1271 }
1272 return EndMBBI;
1273}
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1288
1289 if (isThumb1) return false;
1290 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1291
1294 bool BaseKill = BaseOP.isKill();
1297 unsigned Opcode = MI->getOpcode();
1299
1300
1301
1304 return false;
1305
1317 } else {
1321
1322
1323
1324
1325
1326 if (!STI->hasMinSize() || !BaseKill)
1327 return false;
1328
1329 bool HighRegsUsed = false;
1331 if (MO.getReg() >= ARM::R8) {
1332 HighRegsUsed = true;
1333 break;
1334 }
1335
1336 if (!HighRegsUsed)
1338 else
1339 return false;
1340 }
1341 }
1342 if (MergeInstr != MBB.end()) {
1343 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1345 }
1346
1352
1353
1355 MIB.add(MO);
1356
1357
1359
1360 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1362 return true;
1363}
1364
1367 switch (Opc) {
1368 case ARM::LDRi12:
1369 return ARM::LDR_PRE_IMM;
1370 case ARM::STRi12:
1371 return ARM::STR_PRE_IMM;
1372 case ARM::VLDRS:
1373 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1374 case ARM::VLDRD:
1375 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1376 case ARM::VSTRS:
1377 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1378 case ARM::VSTRD:
1379 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1380 case ARM::t2LDRi8:
1381 case ARM::t2LDRi12:
1382 return ARM::t2LDR_PRE;
1383 case ARM::t2STRi8:
1384 case ARM::t2STRi12:
1385 return ARM::t2STR_PRE;
1387 }
1388}
1389
1392 switch (Opc) {
1393 case ARM::LDRi12:
1394 return ARM::LDR_POST_IMM;
1395 case ARM::STRi12:
1396 return ARM::STR_POST_IMM;
1397 case ARM::VLDRS:
1398 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1399 case ARM::VLDRD:
1400 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1401 case ARM::VSTRS:
1402 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1403 case ARM::VSTRD:
1404 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1405 case ARM::t2LDRi8:
1406 case ARM::t2LDRi12:
1407 return ARM::t2LDR_POST;
1408 case ARM::t2LDRBi8:
1409 case ARM::t2LDRBi12:
1410 return ARM::t2LDRB_POST;
1411 case ARM::t2LDRSBi8:
1412 case ARM::t2LDRSBi12:
1413 return ARM::t2LDRSB_POST;
1414 case ARM::t2LDRHi8:
1415 case ARM::t2LDRHi12:
1416 return ARM::t2LDRH_POST;
1417 case ARM::t2LDRSHi8:
1418 case ARM::t2LDRSHi12:
1419 return ARM::t2LDRSH_POST;
1420 case ARM::t2STRi8:
1421 case ARM::t2STRi12:
1422 return ARM::t2STR_POST;
1423 case ARM::t2STRBi8:
1424 case ARM::t2STRBi12:
1425 return ARM::t2STRB_POST;
1426 case ARM::t2STRHi8:
1427 case ARM::t2STRHi12:
1428 return ARM::t2STRH_POST;
1429
1430 case ARM::MVE_VLDRBS16:
1431 return ARM::MVE_VLDRBS16_post;
1432 case ARM::MVE_VLDRBS32:
1433 return ARM::MVE_VLDRBS32_post;
1434 case ARM::MVE_VLDRBU16:
1435 return ARM::MVE_VLDRBU16_post;
1436 case ARM::MVE_VLDRBU32:
1437 return ARM::MVE_VLDRBU32_post;
1438 case ARM::MVE_VLDRHS32:
1439 return ARM::MVE_VLDRHS32_post;
1440 case ARM::MVE_VLDRHU32:
1441 return ARM::MVE_VLDRHU32_post;
1442 case ARM::MVE_VLDRBU8:
1443 return ARM::MVE_VLDRBU8_post;
1444 case ARM::MVE_VLDRHU16:
1445 return ARM::MVE_VLDRHU16_post;
1446 case ARM::MVE_VLDRWU32:
1447 return ARM::MVE_VLDRWU32_post;
1448 case ARM::MVE_VSTRB16:
1449 return ARM::MVE_VSTRB16_post;
1450 case ARM::MVE_VSTRB32:
1451 return ARM::MVE_VSTRB32_post;
1452 case ARM::MVE_VSTRH32:
1453 return ARM::MVE_VSTRH32_post;
1454 case ARM::MVE_VSTRBU8:
1455 return ARM::MVE_VSTRBU8_post;
1456 case ARM::MVE_VSTRHU16:
1457 return ARM::MVE_VSTRHU16_post;
1458 case ARM::MVE_VSTRWU32:
1459 return ARM::MVE_VSTRWU32_post;
1460
1462 }
1463}
1464
1465
1466
1467bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1468
1469
1470 if (isThumb1) return false;
1471 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1472
1475 unsigned Opcode = MI->getOpcode();
1477 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1478 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1479 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1481 if (MI->getOperand(2).getImm() != 0)
1482 return false;
1484 return false;
1485
1486
1487
1488 if (MI->getOperand(0).getReg() == Base)
1489 return false;
1490
1499 unsigned NewOpc;
1500 if (!isAM5 && Offset == Bytes) {
1502 } else if (Offset == -Bytes) {
1504 } else {
1506 if (MergeInstr == MBB.end())
1507 return false;
1508
1510 if ((isAM5 && Offset != Bytes) ||
1514 return false;
1515 }
1516 }
1517 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1519
1521
1523 if (isAM5) {
1524
1525
1526
1527
1537 (void)MIB;
1538 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1539 } else if (isLd) {
1540 if (isAM2) {
1541
1542 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1543 auto MIB =
1551 (void)MIB;
1552 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1553 } else {
1555 auto MIB =
1563 (void)MIB;
1564 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1565 }
1566 } else {
1567
1568 auto MIB =
1575 (void)MIB;
1576 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1577 }
1578 } else {
1580
1581
1582
1583 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1585
1593 (void)MIB;
1594 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1595 } else {
1596
1603 (void)MIB;
1604 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1605 }
1606 }
1608
1609 return true;
1610}
1611
1612bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1613 unsigned Opcode = MI.getOpcode();
1614 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1615 "Must have t2STRDi8 or t2LDRDi8");
1616 if (MI.getOperand(3).getImm() != 0)
1617 return false;
1618 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1619
1620
1621
1627 return false;
1628
1636 unsigned NewOpc;
1638 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1639 } else {
1641 if (MergeInstr == MBB.end())
1642 return false;
1643 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1645 return false;
1646 }
1647 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1649
1652 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1654 } else {
1655 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1657 }
1660 assert(TII->get(Opcode).getNumOperands() == 6 &&
1661 TII->get(NewOpc).getNumOperands() == 7 &&
1662 "Unexpected number of operands in Opcode specification.");
1663
1664
1666 MIB.add(MO);
1668
1669 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1671 return true;
1672}
1673
1674
1675
1677 unsigned Opcode = MI.getOpcode();
1678 switch (Opcode) {
1679 case ARM::VLDRS:
1680 case ARM::VSTRS:
1681 case ARM::VLDRD:
1682 case ARM::VSTRD:
1683 case ARM::LDRi12:
1684 case ARM::STRi12:
1685 case ARM::tLDRi:
1686 case ARM::tSTRi:
1687 case ARM::tLDRspi:
1688 case ARM::tSTRspi:
1689 case ARM::t2LDRi8:
1690 case ARM::t2LDRi12:
1691 case ARM::t2STRi8:
1692 case ARM::t2STRi12:
1693 break;
1694 default:
1695 return false;
1696 }
1697 if (.getOperand(1).isReg())
1698 return false;
1699
1700
1701
1702 if (.hasOneMemOperand())
1703 return false;
1704
1706
1707
1708
1709
1711 return false;
1712
1713
1714
1716 return false;
1717
1718
1719
1720
1721 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1722 return false;
1723
1724
1725 if (MI.getOperand(1).isUndef())
1726 return false;
1727
1728 return true;
1729}
1730
1733 bool isDef, unsigned NewOpc, unsigned Reg,
1734 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1738 if (isDef) {
1740 TII->get(NewOpc))
1744
1745
1747 } else {
1749 TII->get(NewOpc))
1753
1754
1756 }
1757}
1758
1762 unsigned Opcode = MI->getOpcode();
1763
1764
1765 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1766 return false;
1767
1770 Register EvenReg = MI->getOperand(0).getReg();
1771 Register OddReg = MI->getOperand(1).getReg();
1772 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1773 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1774
1775
1776
1777 bool Errata602117 = EvenReg == BaseReg &&
1778 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1779
1780 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1781 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1782
1783 if (!Errata602117 && !NonConsecutiveRegs)
1784 return false;
1785
1786 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1787 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1788 bool EvenDeadKill = isLd ?
1789 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1790 bool EvenUndef = MI->getOperand(0).isUndef();
1791 bool OddDeadKill = isLd ?
1792 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1793 bool OddUndef = MI->getOperand(1).isUndef();
1794 bool BaseKill = BaseOp.isKill();
1795 bool BaseUndef = BaseOp.isUndef();
1796 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1797 "register offset not handled below");
1801
1802 if (OddRegNum > EvenRegNum && OffImm == 0) {
1803
1804
1805 unsigned NewOpc = (isLd)
1806 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1807 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1808 if (isLd) {
1815 ++NumLDRD2LDM;
1816 } else {
1825 ++NumSTRD2STM;
1826 }
1827 } else {
1828
1829 unsigned NewOpc = (isLd)
1830 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1831 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1832
1833
1834 unsigned NewOpc2 = (isLd)
1835 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1836 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1837
1838
1839 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1840 assert(->regsOverlap(OddReg, BaseReg));
1842 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1844 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1845 MI);
1846 } else {
1847 if (OddReg == EvenReg && EvenDeadKill) {
1848
1849
1850
1851 EvenDeadKill = false;
1852 OddDeadKill = true;
1853 }
1854
1855 if (EvenReg == BaseReg)
1856 EvenDeadKill = false;
1858 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1859 MI);
1861 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1862 MI);
1863 }
1864 if (isLd)
1865 ++NumLDRD2LDR;
1866 else
1867 ++NumSTRD2STR;
1868 }
1869
1871 return true;
1872}
1873
1874
1875
1877 MemOpQueue MemOps;
1878 unsigned CurrBase = 0;
1879 unsigned CurrOpc = ~0u;
1881 unsigned Position = 0;
1882 assert(Candidates.size() == 0);
1883 assert(MergeBaseCandidates.size() == 0);
1884 LiveRegsValid = false;
1885
1888
1890 if (FixInvalidRegPairOp(MBB, MBBI))
1891 continue;
1892 ++Position;
1893
1895 unsigned Opcode = MBBI->getOpcode();
1902 if (CurrBase == 0) {
1903
1904 CurrBase = Base;
1905 CurrOpc = Opcode;
1906 CurrPred = Pred;
1907 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1908 continue;
1909 }
1910
1911 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1912
1913
1914
1915
1916
1917
1918
1919 bool Overlap = false;
1922 if (!Overlap) {
1923 for (const MemOpQueueEntry &E : MemOps) {
1924 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1925 Overlap = true;
1926 break;
1927 }
1928 }
1929 }
1930 }
1931
1932 if (!Overlap) {
1933
1934 if (Offset > MemOps.back().Offset) {
1935 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1936 continue;
1937 } else {
1938 MemOpQueue::iterator MI, ME;
1939 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1940 if (Offset < MI->Offset) {
1941
1942 break;
1943 }
1945
1946 MI = ME;
1947 break;
1948 }
1949 }
1950 if (MI != MemOps.end()) {
1951 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1952 continue;
1953 }
1954 }
1955 }
1956 }
1957
1958
1960 --Position;
1961
1962 } else if (MBBI->isDebugInstr()) {
1963 continue;
1964 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1965 MBBI->getOpcode() == ARM::t2STRDi8) {
1966
1967
1968 MergeBaseCandidates.push_back(&*MBBI);
1969 }
1970
1971
1972 if (MemOps.size() > 0) {
1973 FormCandidates(MemOps);
1974
1975 CurrBase = 0;
1976 CurrOpc = ~0u;
1978 MemOps.clear();
1979 }
1980 }
1981 if (MemOps.size() > 0)
1982 FormCandidates(MemOps);
1983
1984
1985
1986 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1987 return M0->InsertPos < M1->InsertPos;
1988 };
1990
1991
1992 bool Changed = false;
1993 for (const MergeCandidate *Candidate : Candidates) {
1994 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1995 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1996
1997 if (Merged) {
1998 Changed = true;
1999 unsigned Opcode = Merged->getOpcode();
2000 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2001 MergeBaseUpdateLSDouble(*Merged);
2002 else
2003 MergeBaseUpdateLSMultiple(Merged);
2004 } else {
2006 if (MergeBaseUpdateLoadStore(MI))
2007 Changed = true;
2008 }
2009 }
2010 } else {
2011 assert(Candidate->Instrs.size() == 1);
2012 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2013 Changed = true;
2014 }
2015 }
2016 Candidates.clear();
2017
2019 MergeBaseUpdateLSDouble(*MI);
2020 MergeBaseCandidates.clear();
2021
2022 return Changed;
2023}
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2036
2037 if (isThumb1) return false;
2038 if (MBB.empty()) return false;
2039
2042 (MBBI->getOpcode() == ARM::BX_RET ||
2043 MBBI->getOpcode() == ARM::tBX_RET ||
2044 MBBI->getOpcode() == ARM::MOVPCLR)) {
2046
2047 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2048 --PrevI;
2050 unsigned Opcode = PrevMI.getOpcode();
2051 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2052 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2053 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2055 if (MO.getReg() != ARM::LR)
2056 return false;
2057 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2058 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2059 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2064 return true;
2065 }
2066 }
2067 return false;
2068}
2069
2073 MBBI->getOpcode() != ARM::tBX_RET)
2074 return false;
2075
2077 --Prev;
2078 if (Prev->getOpcode() != ARM::tMOVr ||
2079 !Prev->definesRegister(ARM::LR, nullptr))
2080 return false;
2081
2082 for (auto Use : Prev->uses())
2083 if (Use.isKill()) {
2084 assert(STI->hasV4TOps());
2091 return true;
2092 }
2093
2094 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2095}
2096
2097bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2099 return false;
2100
2101 MF = &Fn;
2105 TII = STI->getInstrInfo();
2106 TRI = STI->getRegisterInfo();
2107
2108 RegClassInfoValid = false;
2109 isThumb2 = AFI->isThumb2Function();
2110 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2111
2112 bool Modified = false, ModifiedLDMReturn = false;
2115 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2116 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2117 if (isThumb1)
2119 }
2120 Modified |= ModifiedLDMReturn;
2121
2122
2123
2124
2125
2126 if (ModifiedLDMReturn)
2128
2131}
2132
2133#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2134 "ARM pre- register allocation load / store optimization pass"
2135
2136namespace {
2137
2138
2139
2141 static char ID;
2142
2151
2153
2155
2158 }
2159
2165 }
2166
2167 private:
2172 bool RescheduleOps(
2177 bool DistributeIncrements();
2179 };
2180
2181}
2182
2183char ARMPreAllocLoadStoreOpt::ID = 0;
2184
2190
2191
2192
2194 cl::init(8), cl::Hidden);
2195
2196bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2198 return false;
2199
2200 TD = &Fn.getDataLayout();
2202 TII = STI->getInstrInfo();
2203 TRI = STI->getRegisterInfo();
2204 MRI = &Fn.getRegInfo();
2205 DT = &getAnalysis().getDomTree();
2206 MF = &Fn;
2207 AA = &getAnalysis().getAAResults();
2208
2209 bool Modified = DistributeIncrements();
2211 Modified |= RescheduleLoadStoreInstrs(&MFI);
2212
2214}
2215
2223
2225 while (++I != E) {
2226 if (I->isDebugInstr() || MemOps.count(&*I))
2227 continue;
2228 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2229 return false;
2230 if (I->mayStore() || (!isLd && I->mayLoad()))
2232 if (I->mayAlias(AA, *MemOp, false))
2233 return false;
2234 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2236 if (!MO.isReg())
2237 continue;
2239 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2240 return false;
2241 if (Reg != Base && !MemRegs.count(Reg))
2242 AddedRegPressure.insert(Reg);
2243 }
2244 }
2245
2246
2247 if (MemRegs.size() <= 4)
2248
2249 return true;
2250 return AddedRegPressure.size() <= MemRegs.size() * 2;
2251}
2252
2253bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2257
2258 if (!STI->hasV5TEOps())
2259 return false;
2260
2261
2262 unsigned Scale = 1;
2263 unsigned Opcode = Op0->getOpcode();
2264 if (Opcode == ARM::LDRi12) {
2265 NewOpc = ARM::LDRD;
2266 } else if (Opcode == ARM::STRi12) {
2267 NewOpc = ARM::STRD;
2268 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2269 NewOpc = ARM::t2LDRDi8;
2270 Scale = 4;
2271 isT2 = true;
2272 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2273 NewOpc = ARM::t2STRDi8;
2274 Scale = 4;
2275 isT2 = true;
2276 } else {
2277 return false;
2278 }
2279
2280
2281
2282
2286 return false;
2287
2289 Align ReqAlign = STI->getDualLoadStoreAlignment();
2290 if (Alignment < ReqAlign)
2291 return false;
2292
2293
2295 if (isT2) {
2296 int Limit = (1 << 8) * Scale;
2297 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2298 return false;
2300 } else {
2302 if (OffImm < 0) {
2304 OffImm = - OffImm;
2305 }
2306 int Limit = (1 << 8) * Scale;
2307 if (OffImm >= Limit || (OffImm & (Scale-1)))
2308 return false;
2310 }
2313 if (FirstReg == SecondReg)
2314 return false;
2318 return true;
2319}
2320
2321bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2325 bool RetVal = false;
2326
2327
2331 assert(LHS == RHS || LOffset != ROffset);
2332 return LOffset > ROffset;
2333 });
2334
2335
2336
2337
2338
2339 while (Ops.size() > 1) {
2340 unsigned FirstLoc = ~0U;
2341 unsigned LastLoc = 0;
2344 int LastOffset = 0;
2345 unsigned LastOpcode = 0;
2346 unsigned LastBytes = 0;
2347 unsigned NumMove = 0;
2349
2350 unsigned LSMOpcode
2352 if (LastOpcode && LSMOpcode != LastOpcode)
2353 break;
2354
2355
2358 if (LastBytes) {
2359 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2360 break;
2361 }
2362
2363
2365 break;
2366
2367
2368 ++NumMove;
2369 LastOffset = Offset;
2370 LastBytes = Bytes;
2371 LastOpcode = LSMOpcode;
2372
2373 unsigned Loc = MI2LocMap[Op];
2374 if (Loc <= FirstLoc) {
2375 FirstLoc = Loc;
2376 FirstOp = Op;
2377 }
2378 if (Loc >= LastLoc) {
2379 LastLoc = Loc;
2380 LastOp = Op;
2381 }
2382 }
2383
2384 if (NumMove <= 1)
2386 else {
2389 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2390 MemOps.insert(Ops[i]);
2391 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2392 }
2393
2394
2395
2396 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4;
2397 if (DoMove)
2399 MemOps, MemRegs, TRI, AA);
2400 if (!DoMove) {
2401 for (unsigned i = 0; i != NumMove; ++i)
2403 } else {
2404
2406 while (InsertPos != MBB->end() &&
2407 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2408 ++InsertPos;
2409
2410
2411
2414 Register FirstReg, SecondReg;
2417 bool isT2 = false;
2418 unsigned NewOpc = 0;
2421 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2422 FirstReg, SecondReg, BaseReg,
2423 Offset, PredReg, Pred, isT2)) {
2426
2429 MRI->constrainRegClass(FirstReg, TRC);
2430 MRI->constrainRegClass(SecondReg, TRC);
2431
2432
2433 if (isLd) {
2438
2439
2440
2441 if (!isT2)
2446 ++NumLDRDFormed;
2447 } else {
2452
2453
2454
2455 if (!isT2)
2460 ++NumSTRDFormed;
2461 }
2464
2465 if (!isT2) {
2466
2469 }
2470 } else {
2471 for (unsigned i = 0; i != NumMove; ++i) {
2473 if (isLd) {
2474
2476 RegisterMap[Reg];
2477 }
2478
2480 }
2481 }
2482
2483 NumLdStMoved += NumMove;
2484 RetVal = true;
2485 }
2486 }
2487 }
2488
2489 return RetVal;
2490}
2491
2494 if (MI->isNonListDebugValue()) {
2495 auto &Op = MI->getOperand(0);
2496 if (Op.isReg())
2497 Fn(Op);
2498 } else {
2499 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2500 auto &Op = MI->getOperand(I);
2501 if (Op.isReg())
2502 Fn(Op);
2503 }
2504 }
2505}
2506
2507
2508
2512
2514 auto RegIt = RegisterMap.find(Op.getReg());
2515 if (RegIt == RegisterMap.end())
2516 return;
2517 auto &InstrVec = RegIt->getSecond();
2518 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
2519 });
2520}
2521
2523 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2524 MI->getDebugLoc()->getInlinedAt());
2525 return DbgVar;
2526}
2527
2528bool
2530 bool RetVal = false;
2531
2535 Base2InstMap Base2LdsMap;
2536 Base2InstMap Base2StsMap;
2537 BaseVec LdBases;
2538 BaseVec StBases;
2539
2540
2541
2543
2544 unsigned Loc = 0;
2547 while (MBBI != E) {
2550 if (MI.isCall() || MI.isTerminator()) {
2551
2553 break;
2554 }
2555
2556 if (.isDebugInstr())
2557 MI2LocMap[&MI] = ++Loc;
2558
2560 continue;
2563 continue;
2564
2565 int Opc = MI.getOpcode();
2569 bool StopHere = false;
2570 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2571 auto [BI, Inserted] = Base2Ops.try_emplace(Base);
2572 if (Inserted) {
2573 BI->second.push_back(&MI);
2574 Bases.push_back(Base);
2575 return;
2576 }
2579 StopHere = true;
2580 break;
2581 }
2582 }
2583 if (!StopHere)
2584 BI->second.push_back(&MI);
2585 };
2586
2587 if (isLd)
2588 FindBases(Base2LdsMap, LdBases);
2589 else
2590 FindBases(Base2StsMap, StBases);
2591
2592 if (StopHere) {
2593
2594
2595 --Loc;
2596 break;
2597 }
2598 }
2599
2600
2601 for (unsigned Base : LdBases) {
2603 if (Lds.size() > 1)
2604 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2605 }
2606
2607
2608 for (unsigned Base : StBases) {
2610 if (Sts.size() > 1)
2611 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2612 }
2613
2614 if (MBBI != E) {
2615 Base2LdsMap.clear();
2616 Base2StsMap.clear();
2617 LdBases.clear();
2618 StBases.clear();
2619 }
2620 }
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2773
2774
2778
2779 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2780 auto RegIt = RegisterMap.find(Reg);
2781 if (RegIt == RegisterMap.end())
2782 return;
2783 auto &InstrVec = RegIt->getSecond();
2784 InstrVec.push_back(&MI);
2785 InstrMap[&MI].push_back(Reg);
2786 };
2787
2788 if (MI.isDebugValue()) {
2789 assert(MI.getDebugVariable() &&
2790 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2791
2793
2794
2795
2796
2798 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2799 });
2800
2801
2802
2803
2804
2805
2806 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2807 if (InstrIt != DbgValueSinkCandidates.end()) {
2808 auto *Instr = InstrIt->getSecond();
2809 auto RegIt = InstrMap.find(Instr);
2810 if (RegIt != InstrMap.end()) {
2811 const auto &RegVec = RegIt->getSecond();
2812
2813
2814 for (auto &Reg : RegVec) {
2815 auto RegIt = RegisterMap.find(Reg);
2816 if (RegIt == RegisterMap.end())
2817 continue;
2818 auto &InstrVec = RegIt->getSecond();
2821 return Var == DbgVar;
2822 };
2823
2825 }
2828 }
2829 }
2830 DbgValueSinkCandidates[DbgVar] = &MI;
2831 } else {
2832
2833
2834 auto Opc = MI.getOpcode();
2836 continue;
2837 auto Reg = MI.getOperand(0).getReg();
2838 auto RegIt = RegisterMap.find(Reg);
2839 if (RegIt == RegisterMap.end())
2840 continue;
2841 auto &DbgInstrVec = RegIt->getSecond();
2842 if (!DbgInstrVec.size())
2843 continue;
2844 for (auto *DbgInstr : DbgInstrVec) {
2846 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2847 MBB->insert(InsertPos, ClonedMI);
2849
2850
2852 auto DbgIt = DbgValueSinkCandidates.find(DbgVar);
2853
2854
2855
2856 if (DbgIt != DbgValueSinkCandidates.end())
2857 DbgValueSinkCandidates.erase(DbgIt);
2858
2861
2862
2863 if (DbgInstr->isDebugValueList())
2865 DbgInstr);
2866 }
2867 }
2868 }
2869 return RetVal;
2870}
2871
2872
2873
2874
2875
2877 switch (MI.getOpcode()) {
2878 case ARM::MVE_VLDRBS16:
2879 case ARM::MVE_VLDRBS32:
2880 case ARM::MVE_VLDRBU16:
2881 case ARM::MVE_VLDRBU32:
2882 case ARM::MVE_VLDRHS32:
2883 case ARM::MVE_VLDRHU32:
2884 case ARM::MVE_VLDRBU8:
2885 case ARM::MVE_VLDRHU16:
2886 case ARM::MVE_VLDRWU32:
2887 case ARM::MVE_VSTRB16:
2888 case ARM::MVE_VSTRB32:
2889 case ARM::MVE_VSTRH32:
2890 case ARM::MVE_VSTRBU8:
2891 case ARM::MVE_VSTRHU16:
2892 case ARM::MVE_VSTRWU32:
2893 case ARM::t2LDRHi8:
2894 case ARM::t2LDRHi12:
2895 case ARM::t2LDRSHi8:
2896 case ARM::t2LDRSHi12:
2897 case ARM::t2LDRBi8:
2898 case ARM::t2LDRBi12:
2899 case ARM::t2LDRSBi8:
2900 case ARM::t2LDRSBi12:
2901 case ARM::t2STRBi8:
2902 case ARM::t2STRBi12:
2903 case ARM::t2STRHi8:
2904 case ARM::t2STRHi12:
2905 return 1;
2906 case ARM::MVE_VLDRBS16_post:
2907 case ARM::MVE_VLDRBS32_post:
2908 case ARM::MVE_VLDRBU16_post:
2909 case ARM::MVE_VLDRBU32_post:
2910 case ARM::MVE_VLDRHS32_post:
2911 case ARM::MVE_VLDRHU32_post:
2912 case ARM::MVE_VLDRBU8_post:
2913 case ARM::MVE_VLDRHU16_post:
2914 case ARM::MVE_VLDRWU32_post:
2915 case ARM::MVE_VSTRB16_post:
2916 case ARM::MVE_VSTRB32_post:
2917 case ARM::MVE_VSTRH32_post:
2918 case ARM::MVE_VSTRBU8_post:
2919 case ARM::MVE_VSTRHU16_post:
2920 case ARM::MVE_VSTRWU32_post:
2921 case ARM::MVE_VLDRBS16_pre:
2922 case ARM::MVE_VLDRBS32_pre:
2923 case ARM::MVE_VLDRBU16_pre:
2924 case ARM::MVE_VLDRBU32_pre:
2925 case ARM::MVE_VLDRHS32_pre:
2926 case ARM::MVE_VLDRHU32_pre:
2927 case ARM::MVE_VLDRBU8_pre:
2928 case ARM::MVE_VLDRHU16_pre:
2929 case ARM::MVE_VLDRWU32_pre:
2930 case ARM::MVE_VSTRB16_pre:
2931 case ARM::MVE_VSTRB32_pre:
2932 case ARM::MVE_VSTRH32_pre:
2933 case ARM::MVE_VSTRBU8_pre:
2934 case ARM::MVE_VSTRHU16_pre:
2935 case ARM::MVE_VSTRWU32_pre:
2936 return 2;
2937 }
2938 return -1;
2939}
2940
2942 switch (MI.getOpcode()) {
2943 case ARM::MVE_VLDRBS16_post:
2944 case ARM::MVE_VLDRBS32_post:
2945 case ARM::MVE_VLDRBU16_post:
2946 case ARM::MVE_VLDRBU32_post:
2947 case ARM::MVE_VLDRHS32_post:
2948 case ARM::MVE_VLDRHU32_post:
2949 case ARM::MVE_VLDRBU8_post:
2950 case ARM::MVE_VLDRHU16_post:
2951 case ARM::MVE_VLDRWU32_post:
2952 case ARM::MVE_VSTRB16_post:
2953 case ARM::MVE_VSTRB32_post:
2954 case ARM::MVE_VSTRH32_post:
2955 case ARM::MVE_VSTRBU8_post:
2956 case ARM::MVE_VSTRHU16_post:
2957 case ARM::MVE_VSTRWU32_post:
2958 return true;
2959 }
2960 return false;
2961}
2962
2964 switch (MI.getOpcode()) {
2965 case ARM::MVE_VLDRBS16_pre:
2966 case ARM::MVE_VLDRBS32_pre:
2967 case ARM::MVE_VLDRBU16_pre:
2968 case ARM::MVE_VLDRBU32_pre:
2969 case ARM::MVE_VLDRHS32_pre:
2970 case ARM::MVE_VLDRHU32_pre:
2971 case ARM::MVE_VLDRBU8_pre:
2972 case ARM::MVE_VLDRHU16_pre:
2973 case ARM::MVE_VLDRWU32_pre:
2974 case ARM::MVE_VSTRB16_pre:
2975 case ARM::MVE_VSTRB32_pre:
2976 case ARM::MVE_VSTRH32_pre:
2977 case ARM::MVE_VSTRBU8_pre:
2978 case ARM::MVE_VSTRHU16_pre:
2979 case ARM::MVE_VSTRWU32_pre:
2980 return true;
2981 }
2982 return false;
2983}
2984
2985
2986
2987
2988
2989
2992 int &CodesizeEstimate) {
2994 return true;
2995
2996
3001 CodesizeEstimate += 1;
3002 return Imm < 0 && -Imm < ((1 << 8) * 1);
3003 }
3004 return false;
3005}
3006
3007
3008
3009
3013
3015 MI->getOperand(BaseOp).setReg(NewBaseReg);
3016
3021 MRI.constrainRegClass(NewBaseReg, TRC);
3022
3023 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3025 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3026 else {
3027 unsigned ConvOpcode;
3028 switch (MI->getOpcode()) {
3029 case ARM::t2LDRHi12:
3030 ConvOpcode = ARM::t2LDRHi8;
3031 break;
3032 case ARM::t2LDRSHi12:
3033 ConvOpcode = ARM::t2LDRSHi8;
3034 break;
3035 case ARM::t2LDRBi12:
3036 ConvOpcode = ARM::t2LDRBi8;
3037 break;
3038 case ARM::t2LDRSBi12:
3039 ConvOpcode = ARM::t2LDRSBi8;
3040 break;
3041 case ARM::t2STRHi12:
3042 ConvOpcode = ARM::t2STRHi8;
3043 break;
3044 case ARM::t2STRBi12:
3045 ConvOpcode = ARM::t2STRBi8;
3046 break;
3047 default:
3049 }
3051 "Illegal Address Immediate after convert!");
3052
3054 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3061 MI->eraseFromParent();
3062 }
3063}
3064
3071
3074
3076
3078 MRI.constrainRegClass(NewReg, TRC);
3079
3080 TRC = TII->getRegClass(MCID, 2, TRI, *MF);
3081 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3082
3088
3089 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3099 if (MI->mayLoad()) {
3100 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3108 } else {
3109 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3117 }
3118 default:
3120 }
3121}
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3140
3141
3144
3146
3147
3149 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3152 continue;
3153 }
3154
3156 if (BaseOp == -1)
3157 return false;
3158
3159 if (.getOperand(BaseOp).isReg() ||
3160 Use.getOperand(BaseOp).getReg() != Base)
3161 return false;
3163 PrePostInc = &Use;
3164 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3165 BaseAccess = &Use;
3166 else
3168 }
3169
3170 int IncrementOffset;
3172 if (BaseAccess && Increment) {
3173 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3174 return false;
3176 if (Increment->definesRegister(ARM::CPSR, nullptr) ||
3178 return false;
3179
3180 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3181 << Base.virtRegIndex() << "\n");
3182
3183
3184
3186 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3187 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3189 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3190 return false;
3191 }
3192 }
3193
3194
3199 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3200 return false;
3201 }
3202 }
3203 else if (PrePostInc) {
3204
3205
3206
3207
3208 if (Increment)
3209 return false;
3210
3211 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3212 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3214 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3215 BaseAccess = PrePostInc;
3217 }
3218 else
3219 return false;
3220
3221
3222
3223
3224
3225
3226
3227
3228
3230 int CodesizeEstimate = -1;
3231 for (auto *Use : OtherAccesses) {
3236 Use->getOperand(BaseOp + 1).getImm() -
3237 IncrementOffset,
3238 TII, CodesizeEstimate)) {
3239 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3240 return false;
3241 }
3242 } else if (!DT->dominates(Use, BaseAccess)) {
3244 dbgs() << " Unknown dominance relation between Base and Use\n");
3245 return false;
3246 }
3247 }
3248 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3249 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3250 return false;
3251 }
3252
3253 if (!PrePostInc) {
3254
3257 NewBaseReg = Increment->getOperand(0).getReg();
3262 (void)BaseAccessPost;
3264 }
3265
3266 for (auto *Use : SuccessorAccesses) {
3270 }
3271
3272
3273
3275 Op.setIsKill(false);
3276 return true;
3277}
3278
3279bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3280 bool Changed = false;
3282 for (auto &MBB : *MF) {
3285 if (BaseOp == -1 || .getOperand(BaseOp).isReg())
3286 continue;
3287
3289 if (.isVirtual())
3290 continue;
3291
3293 }
3294 }
3295
3296 for (auto Base : Visited)
3297 Changed |= DistributeIncrements(Base);
3298
3299 return Changed;
3300}
3301
3302
3304 if (PreAlloc)
3305 return new ARMPreAllocLoadStoreOpt();
3306 return new ARMLoadStoreOpt();
3307}
unsigned const MachineRegisterInfo * MRI
static bool isLoadSingle(unsigned Opc)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool > > &Regs, unsigned Reg)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static int getMemoryOpOffset(const MachineInstr &MI)
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const ARMTargetLowering * getTargetLowering() const override
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
Describe properties that are true of each instruction in the target description file.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
void dump() const
Support for debugging, callable in GDB: V->dump()
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
@ Kill
The last use of a register.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
unsigned getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned M1(unsigned Val)
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
int getAddSubImmediate(MachineInstr &MI)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.