LLVM: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
45#include
46#include
47#include
48#include
49#include
50#include
51
52using namespace llvm;
53
54#define DEBUG_TYPE "aarch64-ldst-opt"
55
56STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
57STATISTIC(NumPostFolded, "Number of post-index updates folded");
58STATISTIC(NumPreFolded, "Number of pre-index updates folded");
60 "Number of load/store from unscaled generated");
61STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
62STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
63STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
64 "not passed the alignment check");
66 "Number of const offset of index address folded");
67
69 "Controls which pairs are considered for renaming");
70
71
74
75
76
79
80
81
84
85
88
89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
90
91namespace {
92
93using LdStPairFlags = struct LdStPairFlags {
94
95
96
97 bool MergeForward = false;
98
99
100
101
102
103 int SExtIdx = -1;
104
105
106
107
108 std::optional RenameReg;
109
110 LdStPairFlags() = default;
111
112 void setMergeForward(bool V = true) { MergeForward = V; }
113 bool getMergeForward() const { return MergeForward; }
114
115 void setSExtIdx(int V) { SExtIdx = V; }
116 int getSExtIdx() const { return SExtIdx; }
117
118 void setRenameReg(MCPhysReg R) { RenameReg = R; }
119 void clearRenameReg() { RenameReg = std::nullopt; }
120 std::optional getRenameReg() const { return RenameReg; }
121};
122
124 static char ID;
125
127
132
133
136
137 void getAnalysisUsage(AnalysisUsage &AU) const override {
140 }
141
142
143
144
146 LdStPairFlags &Flags,
147 unsigned Limit,
148 bool FindNarrowMerge);
149
150
151
154
155
159 const LdStPairFlags &Flags);
160
161
165 const LdStPairFlags &Flags);
166
167
171
172
173
174
177 int UnscaledOffset, unsigned Limit);
178
179
180
181
185
186
187
188
189
190
191
194 bool &MergeEither);
195
196
197
199 unsigned BaseReg, int Offset);
200
202 unsigned IndexReg, unsigned &Offset);
203
204
205 std::optionalMachineBasicBlock::iterator
208 bool IsPreIdx, bool MergeEither);
209
213 int Scale);
214
215
217
218
220
221
223
224
226
227
229
231
233
236 }
237
239};
240
241char AArch64LoadStoreOpt::ID = 0;
242
243}
244
247
248static bool isNarrowStore(unsigned Opc) {
249 switch (Opc) {
250 default:
251 return false;
252 case AArch64::STRBBui:
253 case AArch64::STURBBi:
254 case AArch64::STRHHui:
255 case AArch64::STURHHi:
256 return true;
257 }
258}
259
260
261
263 switch (MI.getOpcode()) {
264 default:
265 return false;
266 case AArch64::STGi:
267 case AArch64::STZGi:
268 case AArch64::ST2Gi:
269 case AArch64::STZ2Gi:
270 return true;
271 }
272}
273
275 bool *IsValidLdStrOpc = nullptr) {
276 if (IsValidLdStrOpc)
277 *IsValidLdStrOpc = true;
278 switch (Opc) {
279 default:
280 if (IsValidLdStrOpc)
281 *IsValidLdStrOpc = false;
282 return std::numeric_limits::max();
283 case AArch64::STRDui:
284 case AArch64::STURDi:
285 case AArch64::STRDpre:
286 case AArch64::STRQui:
287 case AArch64::STURQi:
288 case AArch64::STRQpre:
289 case AArch64::STRBBui:
290 case AArch64::STURBBi:
291 case AArch64::STRHHui:
292 case AArch64::STURHHi:
293 case AArch64::STRWui:
294 case AArch64::STRWpre:
295 case AArch64::STURWi:
296 case AArch64::STRXui:
297 case AArch64::STRXpre:
298 case AArch64::STURXi:
299 case AArch64::STR_ZXI:
300 case AArch64::LDRDui:
301 case AArch64::LDURDi:
302 case AArch64::LDRDpre:
303 case AArch64::LDRQui:
304 case AArch64::LDURQi:
305 case AArch64::LDRQpre:
306 case AArch64::LDRWui:
307 case AArch64::LDURWi:
308 case AArch64::LDRWpre:
309 case AArch64::LDRXui:
310 case AArch64::LDURXi:
311 case AArch64::LDRXpre:
312 case AArch64::STRSui:
313 case AArch64::STURSi:
314 case AArch64::STRSpre:
315 case AArch64::LDRSui:
316 case AArch64::LDURSi:
317 case AArch64::LDRSpre:
318 case AArch64::LDR_ZXI:
319 return Opc;
320 case AArch64::LDRSWui:
321 return AArch64::LDRWui;
322 case AArch64::LDURSWi:
323 return AArch64::LDURWi;
324 case AArch64::LDRSWpre:
325 return AArch64::LDRWpre;
326 }
327}
328
330 switch (Opc) {
331 default:
333 case AArch64::STRBBui:
334 return AArch64::STRHHui;
335 case AArch64::STRHHui:
336 return AArch64::STRWui;
337 case AArch64::STURBBi:
338 return AArch64::STURHHi;
339 case AArch64::STURHHi:
340 return AArch64::STURWi;
341 case AArch64::STURWi:
342 return AArch64::STURXi;
343 case AArch64::STRWui:
344 return AArch64::STRXui;
345 }
346}
347
349 switch (Opc) {
350 default:
352 case AArch64::STRSui:
353 case AArch64::STURSi:
354 return AArch64::STPSi;
355 case AArch64::STRSpre:
356 return AArch64::STPSpre;
357 case AArch64::STRDui:
358 case AArch64::STURDi:
359 return AArch64::STPDi;
360 case AArch64::STRDpre:
361 return AArch64::STPDpre;
362 case AArch64::STRQui:
363 case AArch64::STURQi:
364 case AArch64::STR_ZXI:
365 return AArch64::STPQi;
366 case AArch64::STRQpre:
367 return AArch64::STPQpre;
368 case AArch64::STRWui:
369 case AArch64::STURWi:
370 return AArch64::STPWi;
371 case AArch64::STRWpre:
372 return AArch64::STPWpre;
373 case AArch64::STRXui:
374 case AArch64::STURXi:
375 return AArch64::STPXi;
376 case AArch64::STRXpre:
377 return AArch64::STPXpre;
378 case AArch64::LDRSui:
379 case AArch64::LDURSi:
380 return AArch64::LDPSi;
381 case AArch64::LDRSpre:
382 return AArch64::LDPSpre;
383 case AArch64::LDRDui:
384 case AArch64::LDURDi:
385 return AArch64::LDPDi;
386 case AArch64::LDRDpre:
387 return AArch64::LDPDpre;
388 case AArch64::LDRQui:
389 case AArch64::LDURQi:
390 case AArch64::LDR_ZXI:
391 return AArch64::LDPQi;
392 case AArch64::LDRQpre:
393 return AArch64::LDPQpre;
394 case AArch64::LDRWui:
395 case AArch64::LDURWi:
396 return AArch64::LDPWi;
397 case AArch64::LDRWpre:
398 return AArch64::LDPWpre;
399 case AArch64::LDRXui:
400 case AArch64::LDURXi:
401 return AArch64::LDPXi;
402 case AArch64::LDRXpre:
403 return AArch64::LDPXpre;
404 case AArch64::LDRSWui:
405 case AArch64::LDURSWi:
406 return AArch64::LDPSWi;
407 case AArch64::LDRSWpre:
408 return AArch64::LDPSWpre;
409 }
410}
411
416 switch (LdOpc) {
417 default:
419 case AArch64::LDRBBui:
420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
422 case AArch64::LDURBBi:
423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
425 case AArch64::LDRHHui:
426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
427 StOpc == AArch64::STRXui;
428 case AArch64::LDURHHi:
429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
430 StOpc == AArch64::STURXi;
431 case AArch64::LDRWui:
432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
433 case AArch64::LDURWi:
434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
435 case AArch64::LDRXui:
436 return StOpc == AArch64::STRXui;
437 case AArch64::LDURXi:
438 return StOpc == AArch64::STURXi;
439 }
440}
441
443
444
445
446
447 switch (Opc) {
448 default:
450 case AArch64::STRSui:
451 return AArch64::STRSpre;
452 case AArch64::STRDui:
453 return AArch64::STRDpre;
454 case AArch64::STRQui:
455 return AArch64::STRQpre;
456 case AArch64::STRBBui:
457 return AArch64::STRBBpre;
458 case AArch64::STRHHui:
459 return AArch64::STRHHpre;
460 case AArch64::STRWui:
461 return AArch64::STRWpre;
462 case AArch64::STRXui:
463 return AArch64::STRXpre;
464 case AArch64::LDRSui:
465 return AArch64::LDRSpre;
466 case AArch64::LDRDui:
467 return AArch64::LDRDpre;
468 case AArch64::LDRQui:
469 return AArch64::LDRQpre;
470 case AArch64::LDRBBui:
471 return AArch64::LDRBBpre;
472 case AArch64::LDRHHui:
473 return AArch64::LDRHHpre;
474 case AArch64::LDRWui:
475 return AArch64::LDRWpre;
476 case AArch64::LDRXui:
477 return AArch64::LDRXpre;
478 case AArch64::LDRSWui:
479 return AArch64::LDRSWpre;
480 case AArch64::LDPSi:
481 return AArch64::LDPSpre;
482 case AArch64::LDPSWi:
483 return AArch64::LDPSWpre;
484 case AArch64::LDPDi:
485 return AArch64::LDPDpre;
486 case AArch64::LDPQi:
487 return AArch64::LDPQpre;
488 case AArch64::LDPWi:
489 return AArch64::LDPWpre;
490 case AArch64::LDPXi:
491 return AArch64::LDPXpre;
492 case AArch64::STPSi:
493 return AArch64::STPSpre;
494 case AArch64::STPDi:
495 return AArch64::STPDpre;
496 case AArch64::STPQi:
497 return AArch64::STPQpre;
498 case AArch64::STPWi:
499 return AArch64::STPWpre;
500 case AArch64::STPXi:
501 return AArch64::STPXpre;
502 case AArch64::STGi:
503 return AArch64::STGPreIndex;
504 case AArch64::STZGi:
505 return AArch64::STZGPreIndex;
506 case AArch64::ST2Gi:
507 return AArch64::ST2GPreIndex;
508 case AArch64::STZ2Gi:
509 return AArch64::STZ2GPreIndex;
510 case AArch64::STGPi:
511 return AArch64::STGPpre;
512 }
513}
514
516
517 switch (Opc) {
518 default:
520 case AArch64::LDRBroX:
521 return AArch64::LDRBui;
522 case AArch64::LDRBBroX:
523 return AArch64::LDRBBui;
524 case AArch64::LDRSBXroX:
525 return AArch64::LDRSBXui;
526 case AArch64::LDRSBWroX:
527 return AArch64::LDRSBWui;
528 case AArch64::LDRHroX:
529 return AArch64::LDRHui;
530 case AArch64::LDRHHroX:
531 return AArch64::LDRHHui;
532 case AArch64::LDRSHXroX:
533 return AArch64::LDRSHXui;
534 case AArch64::LDRSHWroX:
535 return AArch64::LDRSHWui;
536 case AArch64::LDRWroX:
537 return AArch64::LDRWui;
538 case AArch64::LDRSroX:
539 return AArch64::LDRSui;
540 case AArch64::LDRSWroX:
541 return AArch64::LDRSWui;
542 case AArch64::LDRDroX:
543 return AArch64::LDRDui;
544 case AArch64::LDRXroX:
545 return AArch64::LDRXui;
546 case AArch64::LDRQroX:
547 return AArch64::LDRQui;
548 }
549}
550
552 switch (Opc) {
553 default:
554 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
555 case AArch64::STRSui:
556 case AArch64::STURSi:
557 return AArch64::STRSpost;
558 case AArch64::STRDui:
559 case AArch64::STURDi:
560 return AArch64::STRDpost;
561 case AArch64::STRQui:
562 case AArch64::STURQi:
563 return AArch64::STRQpost;
564 case AArch64::STRBBui:
565 return AArch64::STRBBpost;
566 case AArch64::STRHHui:
567 return AArch64::STRHHpost;
568 case AArch64::STRWui:
569 case AArch64::STURWi:
570 return AArch64::STRWpost;
571 case AArch64::STRXui:
572 case AArch64::STURXi:
573 return AArch64::STRXpost;
574 case AArch64::LDRSui:
575 case AArch64::LDURSi:
576 return AArch64::LDRSpost;
577 case AArch64::LDRDui:
578 case AArch64::LDURDi:
579 return AArch64::LDRDpost;
580 case AArch64::LDRQui:
581 case AArch64::LDURQi:
582 return AArch64::LDRQpost;
583 case AArch64::LDRBBui:
584 return AArch64::LDRBBpost;
585 case AArch64::LDRHHui:
586 return AArch64::LDRHHpost;
587 case AArch64::LDRWui:
588 case AArch64::LDURWi:
589 return AArch64::LDRWpost;
590 case AArch64::LDRXui:
591 case AArch64::LDURXi:
592 return AArch64::LDRXpost;
593 case AArch64::LDRSWui:
594 return AArch64::LDRSWpost;
595 case AArch64::LDPSi:
596 return AArch64::LDPSpost;
597 case AArch64::LDPSWi:
598 return AArch64::LDPSWpost;
599 case AArch64::LDPDi:
600 return AArch64::LDPDpost;
601 case AArch64::LDPQi:
602 return AArch64::LDPQpost;
603 case AArch64::LDPWi:
604 return AArch64::LDPWpost;
605 case AArch64::LDPXi:
606 return AArch64::LDPXpost;
607 case AArch64::STPSi:
608 return AArch64::STPSpost;
609 case AArch64::STPDi:
610 return AArch64::STPDpost;
611 case AArch64::STPQi:
612 return AArch64::STPQpost;
613 case AArch64::STPWi:
614 return AArch64::STPWpost;
615 case AArch64::STPXi:
616 return AArch64::STPXpost;
617 case AArch64::STGi:
618 return AArch64::STGPostIndex;
619 case AArch64::STZGi:
620 return AArch64::STZGPostIndex;
621 case AArch64::ST2Gi:
622 return AArch64::ST2GPostIndex;
623 case AArch64::STZ2Gi:
624 return AArch64::STZ2GPostIndex;
625 case AArch64::STGPi:
626 return AArch64::STGPpost;
627 }
628}
629
631
632 unsigned OpcA = FirstMI.getOpcode();
633 unsigned OpcB = MI.getOpcode();
634
635 switch (OpcA) {
636 default:
637 return false;
638 case AArch64::STRSpre:
639 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
640 case AArch64::STRDpre:
641 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
642 case AArch64::STRQpre:
643 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
644 case AArch64::STRWpre:
645 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
646 case AArch64::STRXpre:
647 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
648 case AArch64::LDRSpre:
649 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
650 case AArch64::LDRDpre:
651 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
652 case AArch64::LDRQpre:
653 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
654 case AArch64::LDRWpre:
655 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
656 case AArch64::LDRXpre:
657 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
658 case AArch64::LDRSWpre:
659 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
660 }
661}
662
663
665 int &MinOffset, int &MaxOffset) {
668
669
670
672
673 if (IsPaired) {
674 MinOffset = -64;
675 MaxOffset = 63;
676 } else {
677 MinOffset = -256;
678 MaxOffset = 255;
679 }
680}
681
683 unsigned PairedRegOp = 0) {
684 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
686 if (IsPreLdSt)
687 PairedRegOp += 1;
688 unsigned Idx =
690 return MI.getOperand(Idx);
691}
692
697 int LoadSize = TII->getMemScale(LoadInst);
699 int UnscaledStOffset =
703 int UnscaledLdOffset =
707 return (UnscaledStOffset <= UnscaledLdOffset) &&
708 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
709}
710
712 unsigned Opc = MI.getOpcode();
713 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
714 isNarrowStore(Opc)) &&
716}
717
719 switch (MI.getOpcode()) {
720 default:
721 return false;
722
723 case AArch64::LDRBBui:
724 case AArch64::LDRHHui:
725 case AArch64::LDRWui:
726 case AArch64::LDRXui:
727
728 case AArch64::LDURBBi:
729 case AArch64::LDURHHi:
730 case AArch64::LDURWi:
731 case AArch64::LDURXi:
732 return true;
733 }
734}
735
737 unsigned Opc = MI.getOpcode();
738 switch (Opc) {
739 default:
740 return false;
741
742 case AArch64::STRSui:
743 case AArch64::STRDui:
744 case AArch64::STRQui:
745 case AArch64::STRXui:
746 case AArch64::STRWui:
747 case AArch64::STRHHui:
748 case AArch64::STRBBui:
749 case AArch64::LDRSui:
750 case AArch64::LDRDui:
751 case AArch64::LDRQui:
752 case AArch64::LDRXui:
753 case AArch64::LDRWui:
754 case AArch64::LDRHHui:
755 case AArch64::LDRBBui:
756 case AArch64::STGi:
757 case AArch64::STZGi:
758 case AArch64::ST2Gi:
759 case AArch64::STZ2Gi:
760 case AArch64::STGPi:
761
762 case AArch64::STURSi:
763 case AArch64::STURDi:
764 case AArch64::STURQi:
765 case AArch64::STURWi:
766 case AArch64::STURXi:
767 case AArch64::LDURSi:
768 case AArch64::LDURDi:
769 case AArch64::LDURQi:
770 case AArch64::LDURWi:
771 case AArch64::LDURXi:
772
773 case AArch64::LDPSi:
774 case AArch64::LDPSWi:
775 case AArch64::LDPDi:
776 case AArch64::LDPQi:
777 case AArch64::LDPWi:
778 case AArch64::LDPXi:
779 case AArch64::STPSi:
780 case AArch64::STPDi:
781 case AArch64::STPQi:
782 case AArch64::STPWi:
783 case AArch64::STPXi:
784
786 return false;
787
788
789
790
791
792
795 return false;
796
797 return true;
798 }
799}
800
801
803 unsigned Opc = MI.getOpcode();
804 switch (Opc) {
805 default:
806 return false;
807
808
809 case AArch64::LDRBroX:
810 case AArch64::LDRBBroX:
811 case AArch64::LDRSBXroX:
812 case AArch64::LDRSBWroX:
813 Scale = 1;
814 return true;
815 case AArch64::LDRHroX:
816 case AArch64::LDRHHroX:
817 case AArch64::LDRSHXroX:
818 case AArch64::LDRSHWroX:
819 Scale = 2;
820 return true;
821 case AArch64::LDRWroX:
822 case AArch64::LDRSroX:
823 case AArch64::LDRSWroX:
824 Scale = 4;
825 return true;
826 case AArch64::LDRDroX:
827 case AArch64::LDRXroX:
828 Scale = 8;
829 return true;
830 case AArch64::LDRQroX:
831 Scale = 16;
832 return true;
833 }
834}
835
837 switch (Opc) {
838 default:
839 return false;
840 case AArch64::ORRWrs:
841 case AArch64::ADDWri:
842 return true;
843 }
844}
845
849 const LdStPairFlags &Flags) {
851 "Expected promotable zero stores.");
852
855
856
857
858
859 if (NextI == MergeMI)
861
862 unsigned Opc = I->getOpcode();
863 unsigned MergeMIOpc = MergeMI->getOpcode();
864 bool IsScaled = ->hasUnscaledLdStOffset(Opc);
865 bool IsMergedMIScaled = ->hasUnscaledLdStOffset(MergeMIOpc);
866 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
867 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
868
869 bool MergeForward = Flags.getMergeForward();
870
871
873
874
875 const MachineOperand &BaseRegOp =
877 : AArch64InstrInfo::getLdStBaseOp(*I);
878
879
880 int64_t IOffsetInBytes =
882 int64_t MIOffsetInBytes =
884 MergeMIOffsetStride;
885
886 int64_t OffsetImm;
887 if (IOffsetInBytes > MIOffsetInBytes)
888 OffsetImm = MIOffsetInBytes;
889 else
890 OffsetImm = IOffsetInBytes;
891
893
894
895 if (->hasUnscaledLdStOffset(NewOpcode)) {
896 int NewOffsetStride = TII->getMemScale(NewOpcode);
897 assert(((OffsetImm % NewOffsetStride) == 0) &&
898 "Offset should be a multiple of the store memory scale");
899 OffsetImm = OffsetImm / NewOffsetStride;
900 }
901
902
905 MachineInstrBuilder MIB;
907 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
908 .add(BaseRegOp)
911 .setMIFlags(I->mergeFlagsWith(*MergeMI));
912 (void)MIB;
913
914 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
921
922
923 I->eraseFromParent();
924 MergeMI->eraseFromParent();
925 return NextI;
926}
927
928
929
930
933 std::function<bool(MachineInstr &, bool)> &Fn) {
934 auto MBB = MI.getParent();
937 if (!Limit)
938 return false;
939 --Limit;
940
942 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
943 TRI->regsOverlap(MOP.getReg(), DefReg);
944 });
945 if (!Fn(I, isDef))
946 return false;
947 if (isDef)
948 break;
949 }
950 return true;
951}
952
955
957 if (MOP.isReg() && MOP.isKill())
959
961 if (MOP.isReg() && !MOP.isKill())
962 Units.addReg(MOP.getReg());
963}
964
965
966
967
969 unsigned InstrNumToSet,
972
973
974
976 unsigned OperandNo = 0;
977 bool RegFound = false;
978 for (const auto Op : MergedInstr.operands()) {
980 RegFound = true;
981 break;
982 }
983 OperandNo++;
984 }
985
986 if (RegFound)
988 {InstrNumToSet, OperandNo});
989}
990
994 const LdStPairFlags &Flags) {
997
998
999
1000
1001 if (NextI == Paired)
1003
1004 int SExtIdx = Flags.getSExtIdx();
1005 unsigned Opc =
1007 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1008 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1009
1010 bool MergeForward = Flags.getMergeForward();
1011
1012 std::optional RenameReg = Flags.getRenameReg();
1013 if (RenameReg) {
1015 DefinedInBB.addReg(*RenameReg);
1016
1017
1018
1019 auto GetMatchingSubReg =
1020 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1022 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1023 if (C->contains(SubOrSuper))
1024 return SubOrSuper;
1025 }
1026 llvm_unreachable("Should have found matching sub or super register!");
1027 };
1028
1029 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1030 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1031 bool IsDef) {
1032 if (IsDef) {
1033 bool SeenDef = false;
1034 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1035 MachineOperand &MOP = MI.getOperand(OpIdx);
1036
1037
1039 (!MergeForward || !SeenDef ||
1041 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1044 "Need renamable operands");
1046 if (const TargetRegisterClass *RC =
1048 MatchingReg = GetMatchingSubReg(RC);
1049 else {
1051 continue;
1052 MatchingReg = GetMatchingSubReg(
1053 TRI->getMinimalPhysRegClass(MOP.getReg()));
1054 }
1055 MOP.setReg(MatchingReg);
1056 SeenDef = true;
1057 }
1058 }
1059 } else {
1060 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1061 MachineOperand &MOP = MI.getOperand(OpIdx);
1063 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1066 "Need renamable operands");
1068 if (const TargetRegisterClass *RC =
1070 MatchingReg = GetMatchingSubReg(RC);
1071 else
1072 MatchingReg = GetMatchingSubReg(
1073 TRI->getMinimalPhysRegClass(MOP.getReg()));
1074 assert(MatchingReg != AArch64::NoRegister &&
1075 "Cannot find matching regs for renaming");
1076 MOP.setReg(MatchingReg);
1077 }
1078 }
1079 }
1081 return true;
1082 };
1083 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1084 TRI, UINT32_MAX, UpdateMIs);
1085
1086#if !defined(NDEBUG)
1087
1088
1089
1090
1091 MCPhysReg RegToCheck = *RenameReg;
1092
1093
1094
1095
1096 if (!MergeForward)
1097 RegToCheck = RegToRename;
1098 for (auto &MI :
1099 iterator_range<MachineInstrBundleIteratorllvm::MachineInstr>(
1100 MergeForward ? std::next(I) : I,
1101 MergeForward ? std::next(Paired) : Paired))
1103 [this, RegToCheck](const MachineOperand &MOP) {
1104 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1105 MOP.isUndef() ||
1106 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1107 }) &&
1108 "Rename register used between paired instruction, trashing the "
1109 "content");
1110#endif
1111 }
1112
1113
1114
1116
1117
1118 const MachineOperand &BaseRegOp =
1120 : AArch64InstrInfo::getLdStBaseOp(*I);
1121
1124 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1125 if (IsUnscaled != PairedIsUnscaled) {
1126
1127
1128
1129 int MemSize = TII->getMemScale(*Paired);
1130 if (PairedIsUnscaled) {
1131
1132
1133 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1134 "Offset should be a multiple of the stride!");
1135 PairedOffset /= MemSize;
1136 } else {
1137 PairedOffset *= MemSize;
1138 }
1139 }
1140
1141
1142
1143
1144 MachineInstr *RtMI, *Rt2MI;
1145 if (Offset == PairedOffset + OffsetStride &&
1147 RtMI = &*Paired;
1148 Rt2MI = &*I;
1149
1150
1151
1152 if (SExtIdx != -1)
1153 SExtIdx = (SExtIdx + 1) % 2;
1154 } else {
1155 RtMI = &*I;
1156 Rt2MI = &*Paired;
1157 }
1159
1160 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1161 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1162 "Unscaled offset cannot be scaled.");
1163 OffsetImm /= TII->getMemScale(*RtMI);
1164 }
1165
1166
1167 MachineInstrBuilder MIB;
1170 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1171 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1172 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1173
1174 if (RegOp0.isUse()) {
1175 if (!MergeForward) {
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1187 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1189 } else {
1190
1191
1192
1193
1195 for (MachineInstr &MI :
1196 make_range(std::next(I->getIterator()), Paired->getIterator()))
1197 MI.clearRegisterKills(Reg, TRI);
1198 }
1199 }
1200
1203
1204
1207
1208 MIB.add(RegOp0)
1209 .add(RegOp1)
1210 .add(BaseRegOp)
1213 .setMIFlags(I->mergeFlagsWith(*Paired));
1214
1215 (void)MIB;
1216
1218 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1223 if (SExtIdx != -1) {
1224
1225
1226
1227
1228 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1229
1230
1232
1233 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1234
1235 DstMO.setReg(DstRegW);
1238
1239
1240
1241
1242 MachineInstrBuilder MIBKill =
1243 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1247
1248 MachineInstrBuilder MIBSXTW =
1249 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1253 (void)MIBSXTW;
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289 if (I->peekDebugInstrNum()) {
1290
1291
1292
1293
1294
1295
1296
1297 unsigned NewInstrNum;
1298 if (DstRegX == I->getOperand(0).getReg()) {
1301 *MIBSXTW);
1302 } else {
1305 }
1306 }
1307 if (Paired->peekDebugInstrNum()) {
1308
1309
1310
1311
1312
1313
1314
1315 unsigned NewInstrNum;
1316 if (DstRegX == Paired->getOperand(0).getReg()) {
1319 *MIBSXTW);
1320 } else {
1323 *MIB);
1324 }
1325 }
1326
1329 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1330
1331
1332 MachineOperand &MOp0 = MIB->getOperand(0);
1333 MachineOperand &MOp1 = MIB->getOperand(1);
1335 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1336 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1337 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1339 } else {
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368 if (I->peekDebugInstrNum()) {
1371 *MIB);
1372 }
1373 if (Paired->peekDebugInstrNum()) {
1376 *MIB);
1377 }
1378
1380 }
1382
1383 if (MergeForward)
1387
1388
1389
1390
1393 SmallSetVector<Register, 4> Ops;
1394 for (const MachineOperand &MO :
1395 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1396 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1397 Ops.insert(MO.getReg());
1398 for (const MachineOperand &MO :
1399 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1400 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1401 Ops.insert(MO.getReg());
1404 };
1405 CopyImplicitOps(I, Paired);
1406
1407
1408 I->eraseFromParent();
1409 Paired->eraseFromParent();
1410
1411 return NextI;
1412}
1413
1418 next_nodbg(LoadI, LoadI->getParent()->end());
1419
1420 int LoadSize = TII->getMemScale(*LoadI);
1421 int StoreSize = TII->getMemScale(*StoreI);
1423 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1425 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1426
1427 assert((IsStoreXReg ||
1428 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1429 "Unexpected RegClass");
1430
1431 MachineInstr *BitExtMI;
1432 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1433
1434
1435 if (StRt == LdRt && LoadSize == 8) {
1436 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1437 LoadI->getIterator())) {
1438 if (MI.killsRegister(StRt, TRI)) {
1439 MI.clearRegisterKills(StRt, TRI);
1440 break;
1441 }
1442 }
1443 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1446 LoadI->eraseFromParent();
1447 return NextI;
1448 }
1449
1450 BitExtMI =
1451 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1452 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1453 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1454 .add(StMO)
1457 } else {
1458
1459
1461 return NextI;
1462 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1463 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1464 "Unsupported ld/st match");
1465 assert(LoadSize <= StoreSize && "Invalid load size");
1466 int UnscaledLdOffset =
1467 IsUnscaled
1470 int UnscaledStOffset =
1471 IsUnscaled
1474 int Width = LoadSize * 8;
1476 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1477 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1478 : LdRt;
1479
1480 assert((UnscaledLdOffset >= UnscaledStOffset &&
1481 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1482 "Invalid offset");
1483
1484 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1485 int Imms = Immr + Width - 1;
1486 if (UnscaledLdOffset == UnscaledStOffset) {
1487 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1488 | ((Immr) << 6)
1489 | ((Imms) << 0)
1490 ;
1491
1492 BitExtMI =
1493 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1494 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1495 DestReg)
1496 .add(StMO)
1497 .addImm(AndMaskEncoded)
1499 } else if (IsStoreXReg && Imms == 31) {
1500
1501
1502 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1503 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1504 TII->get(AArch64::UBFMWri),
1505 TRI->getSubReg(DestReg, AArch64::sub_32))
1506 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1510 } else {
1511 BitExtMI =
1512 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1513 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1514 DestReg)
1515 .add(StMO)
1519 }
1520 }
1521
1522
1523 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1525 if (MI.killsRegister(StRt, TRI)) {
1526 MI.clearRegisterKills(StRt, TRI);
1527 break;
1528 }
1529
1530 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1539
1540
1541 LoadI->eraseFromParent();
1542 return NextI;
1543}
1544
1546
1547
1548 if (IsUnscaled) {
1549
1550
1551 if (Offset % OffsetStride)
1552 return false;
1553 Offset /= OffsetStride;
1554 }
1556}
1557
1558
1559
1560
1561
1562static int alignTo(int Num, int PowOf2) {
1563 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1564}
1565
1570 if (MIa.mayAlias(AA, *MIb, false)) {
1571 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1572 return true;
1573 }
1574 }
1575
1577 return false;
1578}
1579
1580bool AArch64LoadStoreOpt::findMatchingStore(
1585 MachineInstr &LoadMI = *I;
1587
1588
1589
1591 return false;
1592
1593
1594
1595 ModifiedRegUnits.clear();
1596 UsedRegUnits.clear();
1597
1598 unsigned Count = 0;
1599 do {
1601 MachineInstr &MI = *MBBI;
1602
1603
1604
1605 if (.isTransient())
1607
1608
1609
1610
1611
1612
1613
1619 StoreI = MBBI;
1620 return true;
1621 }
1622
1623 if (MI.isCall())
1624 return false;
1625
1626
1628
1629
1630
1631 if (!ModifiedRegUnits.available(BaseReg))
1632 return false;
1633
1634
1635 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, false))
1636 return false;
1637 } while (MBBI != B && Count < Limit);
1638 return false;
1639}
1640
1645
1646
1647
1649 LdStPairFlags &Flags,
1651
1652 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1653 return false;
1654
1655
1657 ->isLdStPairSuppressed(FirstMI) &&
1658 "FirstMI shouldn't get here if either of these checks are true.");
1659
1662 return false;
1663
1664 unsigned OpcA = FirstMI.getOpcode();
1665 unsigned OpcB = MI.getOpcode();
1666
1667
1668 if (OpcA == OpcB)
1670
1671
1672
1673 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1674 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1675 return false;
1676
1677
1679 return false;
1680
1681
1682 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1684 assert(IsValidLdStrOpc &&
1685 "Given Opc should be a Load or Store with an immediate");
1686
1688 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1689 return true;
1690 }
1691
1692
1693
1694 if (!PairIsValidLdStrOpc)
1695 return false;
1696
1697
1698
1699 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1702 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1703
1704
1705
1706
1708 return true;
1709
1710
1711 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1713
1714
1715}
1716
1719 if (MOP.isReg()) {
1720 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1721
1722
1723
1724
1725
1726
1727 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1728 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1729 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1730 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1733 << " Cannot rename operands with multiple disjunct subregisters ("
1734 << MOP << ")\n");
1735 return false;
1736 }
1737
1738
1739
1740
1743 return false;
1744 return TRI->isSuperOrSubRegisterEq(
1746 }
1747 }
1750}
1751
1752static bool
1757 return false;
1758
1759
1760
1761
1763
1767 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1768 MOP.isImplicit() && MOP.isKill() &&
1769 TRI->regsOverlap(RegToRename, MOP.getReg());
1770 })) {
1771 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1772 return false;
1773 }
1774
1775 bool FoundDef = false;
1776
1777
1778
1779
1780
1782 bool IsDef) {
1784
1786 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1787 << "currently\n");
1788 return false;
1789 }
1790
1792
1793
1794
1795 FoundDef = IsDef;
1796
1797
1798 if (FoundDef) {
1799
1800
1801
1802
1803
1804
1805
1806 if (MI.isPseudo()) {
1807 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1808 return false;
1809 }
1810
1811 for (auto &MOP : MI.operands()) {
1813 ->regsOverlap(MOP.getReg(), RegToRename))
1814 continue;
1816 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1817 return false;
1818 }
1819 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1820 }
1821 return true;
1822 } else {
1823 for (auto &MOP : MI.operands()) {
1825 ->regsOverlap(MOP.getReg(), RegToRename))
1826 continue;
1827
1829 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1830 return false;
1831 }
1832 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1833 }
1834 }
1835 return true;
1836 };
1837
1839 return false;
1840
1841 if (!FoundDef) {
1842 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1843 return false;
1844 }
1845 return true;
1846}
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1863 return false;
1864
1865 UsedInBetween.accumulate(FirstLoad);
1867 bool Success = std::all_of(
1870 LLVM_DEBUG(dbgs() << "Checking " << MI);
1871
1872 if (MI.getFlag(MachineInstr::FrameSetup)) {
1873 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1874 << "currently\n");
1875 return false;
1876 }
1877
1878 for (auto &MOP : MI.operands()) {
1879 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1880 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1881 continue;
1882 if (!canRenameMOP(MOP, TRI)) {
1883 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1884 return false;
1885 }
1886 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1887 }
1888
1889 return true;
1890 });
1892}
1893
1894
1895
1896
1897
1898
1899
1900
1907
1908
1909 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1910 return any_of(TRI->sub_and_superregs_inclusive(PR),
1912 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1913 });
1914 };
1915
1916
1917
1918 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1921 TRI->sub_and_superregs_inclusive(PR),
1922 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1923 });
1924 };
1925
1926 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1927 for (const MCPhysReg &PR : *RegClass) {
1929 .isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1930 CanBeUsedForAllClasses(PR)) {
1931 DefinedInBB.addReg(PR);
1933 << "\n");
1934 return {PR};
1935 }
1936 }
1937 LLVM_DEBUG(dbgs() << "No rename register found from "
1938 << TRI->getRegClassName(RegClass) << "\n");
1939 return std::nullopt;
1940}
1941
1942
1943
1944
1950 std::optional RenameReg;
1952 return RenameReg;
1953
1957 return RenameReg;
1958
1959 const bool IsLoad = FirstMI.mayLoad();
1960
1961 if (!MaybeCanRename) {
1962 if (IsLoad)
1964 RequiredClasses, TRI)};
1965 else
1966 MaybeCanRename = {
1968 }
1969
1970 if (*MaybeCanRename) {
1972 RequiredClasses, TRI);
1973 }
1974 return RenameReg;
1975}
1976
1977
1978
1981 LdStPairFlags &Flags, unsigned Limit,
1982 bool FindNarrowMerge) {
1986 MachineInstr &FirstMI = *I;
1988
1990 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1994 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1996
1997 std::optional MaybeCanRename;
1999 MaybeCanRename = {false};
2000
2001 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2002 LiveRegUnits UsedInBetween;
2003 UsedInBetween.init(*TRI);
2004
2005 Flags.clearRenameReg();
2006
2007
2008
2009 ModifiedRegUnits.clear();
2010 UsedRegUnits.clear();
2011
2012
2013 SmallVector<MachineInstr *, 4> MemInsns;
2014
2018 MachineInstr &MI = *MBBI;
2020
2022
2023
2024
2025 if (.isTransient())
2027
2028 Flags.setSExtIdx(-1);
2031 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2032
2033
2034
2035
2036
2037
2040 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2041 if (IsUnscaled != MIIsUnscaled) {
2042
2043
2044
2045 int MemSize = TII->getMemScale(MI);
2046 if (MIIsUnscaled) {
2047
2048
2049 if (MIOffset % MemSize) {
2051 UsedRegUnits, TRI);
2053 continue;
2054 }
2055 MIOffset /= MemSize;
2056 } else {
2057 MIOffset *= MemSize;
2058 }
2059 }
2060
2062
2063 if (BaseReg == MIBaseReg) {
2064
2065
2066
2067
2068 if (IsPreLdSt) {
2069 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2070 bool IsBaseRegUsed = !UsedRegUnits.available(
2072 bool IsBaseRegModified = !ModifiedRegUnits.available(
2074
2075
2076
2077 bool IsMIRegTheSame =
2080 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2081 IsMIRegTheSame) {
2083 UsedRegUnits, TRI);
2085 continue;
2086 }
2087 } else {
2088 if ((Offset != MIOffset + OffsetStride) &&
2089 (Offset + OffsetStride != MIOffset)) {
2091 UsedRegUnits, TRI);
2093 continue;
2094 }
2095 }
2096
2097 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2098 if (FindNarrowMerge) {
2099
2100
2101
2102
2103 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2106 UsedRegUnits, TRI);
2108 continue;
2109 }
2110 } else {
2111
2112
2113
2114
2115 if ((IsUnscaled, MinOffset, OffsetStride)) {
2117 UsedRegUnits, TRI);
2119 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2120 << "keep looking.\n");
2121 continue;
2122 }
2123
2124
2125
2126 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2128 UsedRegUnits, TRI);
2131 << "Offset doesn't fit due to alignment requirements, "
2132 << "keep looking.\n");
2133 continue;
2134 }
2135 }
2136
2137
2138
2139
2140
2141
2142
2143 if (!ModifiedRegUnits.available(BaseReg))
2144 return E;
2145
2146 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2148
2149
2150
2151
2152
2153 bool RtNotModified =
2155 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2157
2158 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2159 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2160 << (RtNotModified ? "true" : "false") << "\n"
2162 << (RtNotUsed ? "true" : "false") << "\n");
2163
2164 if (RtNotModified && RtNotUsed && (MI, MemInsns, AA)) {
2165
2166
2167
2168 if (SameLoadReg) {
2169 std::optional RenameReg =
2171 Reg, DefinedInBB, UsedInBetween,
2172 RequiredClasses, TRI);
2173 if (!RenameReg) {
2175 UsedRegUnits, TRI);
2177 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2178 << "keep looking.\n");
2179 continue;
2180 }
2181 Flags.setRenameReg(*RenameReg);
2182 }
2183
2184 Flags.setMergeForward(false);
2185 if (!SameLoadReg)
2186 Flags.clearRenameReg();
2187 return MBBI;
2188 }
2189
2190
2191
2192
2193
2194 RtNotModified = !(
2196
2197 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2199 << "' not modified: "
2200 << (RtNotModified ? "true" : "false") << "\n");
2201
2202 if (RtNotModified && (FirstMI, MemInsns, AA)) {
2204 Flags.setMergeForward(true);
2205 Flags.clearRenameReg();
2206 return MBBI;
2207 }
2208
2210 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2211 RequiredClasses, TRI);
2212 if (RenameReg) {
2213 Flags.setMergeForward(true);
2214 Flags.setRenameReg(*RenameReg);
2215 MBBIWithRenameReg = MBBI;
2216 }
2217 }
2218 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2219 << "interference in between, keep looking.\n");
2220 }
2221 }
2222
2223 if (Flags.getRenameReg())
2224 return MBBIWithRenameReg;
2225
2226
2227
2228 if (MI.isCall()) {
2229 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2230 return E;
2231 }
2232
2233
2235
2236
2237
2238 if (!ModifiedRegUnits.available(BaseReg)) {
2239 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2240 return E;
2241 }
2242
2243
2244 if (MI.mayLoadOrStore())
2246 }
2247 return E;
2248}
2249
2252 assert((MI.getOpcode() == AArch64::SUBXri ||
2253 MI.getOpcode() == AArch64::ADDXri) &&
2254 "Expected a register update instruction");
2255 auto End = MI.getParent()->end();
2256 if (MaybeCFI == End ||
2257 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2260 MI.getOperand(0).getReg() != AArch64::SP)
2261 return End;
2262
2264 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2269 return MaybeCFI;
2270 default:
2271 return End;
2272 }
2273}
2274
2275std::optionalMachineBasicBlock::iterator AArch64LoadStoreOpt::mergeUpdateInsn(
2277 bool IsForward, bool IsPreIdx, bool MergeEither) {
2278 assert((Update->getOpcode() == AArch64::ADDXri ||
2279 Update->getOpcode() == AArch64::SUBXri) &&
2280 "Unexpected base register update instruction to merge!");
2283
2284
2285
2286
2287
2289 if (IsForward) {
2292 if (MergeEither) {
2293 InsertPt = Update;
2294 } else {
2295
2296 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2297 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2298 }))
2299 return std::nullopt;
2300
2301 MachineBasicBlock *MBB = InsertPt->getParent();
2303 }
2304 }
2305 }
2306
2307
2308
2309
2310 if (NextI == Update)
2312
2313 int Value = Update->getOperand(2).getImm();
2315 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2316 if (Update->getOpcode() == AArch64::SUBXri)
2318
2321 MachineInstrBuilder MIB;
2322 int Scale, MinOffset, MaxOffset;
2325
2326 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2328 .add(Update->getOperand(0))
2333 .setMIFlags(I->mergeFlagsWith(*Update));
2334 } else {
2335
2336 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2338 .add(Update->getOperand(0))
2344 .setMIFlags(I->mergeFlagsWith(*Update));
2345 }
2346
2347 if (IsPreIdx) {
2348 ++NumPreFolded;
2349 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2350 } else {
2351 ++NumPostFolded;
2352 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2353 }
2354 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2361
2362
2363 I->eraseFromParent();
2364 Update->eraseFromParent();
2365
2366 return NextI;
2367}
2368
2372 unsigned Offset, int Scale) {
2373 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2374 "Unexpected const mov instruction to merge!");
2378 MachineInstr &MemMI = *I;
2379 unsigned Mask = (1 << 12) * Scale - 1;
2384 MachineInstrBuilder AddMIB, MemMIB;
2385
2386
2387 AddMIB =
2388 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2392 .addImm(12);
2393 (void)AddMIB;
2394
2396 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2401 .setMIFlags(I->mergeFlagsWith(*Update));
2402 (void)MemMIB;
2403
2404 ++NumConstOffsetFolded;
2405 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2406 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2417
2418
2419 I->eraseFromParent();
2420 PrevI->eraseFromParent();
2421 Update->eraseFromParent();
2422
2423 return NextI;
2424}
2425
2426bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2427 MachineInstr &MI,
2428 unsigned BaseReg, int Offset) {
2429 switch (MI.getOpcode()) {
2430 default:
2431 break;
2432 case AArch64::SUBXri:
2433 case AArch64::ADDXri:
2434
2435
2436 if (.getOperand(2).isImm())
2437 break;
2438
2440 break;
2441
2442
2443
2444 if (MI.getOperand(0).getReg() != BaseReg ||
2445 MI.getOperand(1).getReg() != BaseReg)
2446 break;
2447
2448 int UpdateOffset = MI.getOperand(2).getImm();
2449 if (MI.getOpcode() == AArch64::SUBXri)
2450 UpdateOffset = -UpdateOffset;
2451
2452
2453
2454 int Scale, MinOffset, MaxOffset;
2456 if (UpdateOffset % Scale != 0)
2457 break;
2458
2459
2460 int ScaledOffset = UpdateOffset / Scale;
2461 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2462 break;
2463
2464
2465
2467 return true;
2468 break;
2469 }
2470 return false;
2471}
2472
2473bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2474 MachineInstr &MI,
2475 unsigned IndexReg,
2477
2478
2479 if (MI.getOpcode() == AArch64::MOVKWi &&
2480 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2481
2482
2485
2487 return false;
2489 MachineInstr &MovzMI = *MBBI;
2490
2491 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2494 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2496
2497 return Offset >> 24 == 0;
2498 }
2499 }
2500 return false;
2501}
2502
2506 MachineInstr &MemMI = *I;
2508
2511 TII->getMemScale(MemMI);
2512
2513
2514
2515
2516 if (MIUnscaledOffset != UnscaledOffset)
2517 return E;
2518
2519
2520
2521
2522
2523
2524
2527 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2529 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2530 return E;
2531 }
2532 }
2533
2534
2535
2536 ModifiedRegUnits.clear();
2537 UsedRegUnits.clear();
2539
2540
2541
2542
2543 const bool BaseRegSP = BaseReg == AArch64::SP;
2545
2546
2547
2548 return E;
2549 }
2550
2551 unsigned Count = 0;
2552 MachineBasicBlock *CurMBB = I->getParent();
2553
2555
2556 while (true) {
2559 MachineInstr &MI = *MBBI;
2560
2561
2562
2563 if (.isTransient())
2565
2566
2567 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2568 return MBBI;
2569
2570
2573
2574
2575
2576
2577
2578 if (!ModifiedRegUnits.available(BaseReg) ||
2579 !UsedRegUnits.available(BaseReg) ||
2580 (BaseRegSP && MBBI->mayLoadOrStore()))
2581 return E;
2582 }
2583
2584 if (!VisitSucc || Limit <= Count)
2585 break;
2586
2587
2588
2589 MachineBasicBlock *SuccToVisit = nullptr;
2590 unsigned LiveSuccCount = 0;
2591 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2592 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2593 if (Succ->isLiveIn(*AI)) {
2594 if (LiveSuccCount++)
2595 return E;
2596 if (Succ->pred_size() == 1)
2597 SuccToVisit = Succ;
2598 break;
2599 }
2600 }
2601 }
2602 if (!SuccToVisit)
2603 break;
2604 CurMBB = SuccToVisit;
2606 }
2607
2608 return E;
2609}
2610
2615 MachineInstr &MemMI = *I;
2617 MachineFunction &MF = *MemMI.getMF();
2618
2621
2625 : AArch64::NoRegister};
2626
2627
2628
2630 return E;
2631
2632
2634 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2635 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2636 return E;
2637 }
2638
2639 const bool BaseRegSP = BaseReg == AArch64::SP;
2641
2642
2643
2644 return E;
2645 }
2646
2647 const AArch64Subtarget &Subtarget = MF.getSubtarget();
2648 unsigned RedZoneSize =
2650
2651
2652
2653 ModifiedRegUnits.clear();
2654 UsedRegUnits.clear();
2655 unsigned Count = 0;
2656 bool MemAccessBeforeSPPreInc = false;
2657 MergeEither = true;
2658 do {
2660 MachineInstr &MI = *MBBI;
2661
2662
2663
2664 if (.isTransient())
2666
2667
2668 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2669
2670
2671 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2672 return E;
2673 return MBBI;
2674 }
2675
2676
2678
2679
2680
2681 if (!ModifiedRegUnits.available(BaseReg) ||
2682 !UsedRegUnits.available(BaseReg))
2683 return E;
2684
2685
2686
2687
2688
2689 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2690 (DestReg[0] != AArch64::NoRegister &&
2691 !(ModifiedRegUnits.available(DestReg[0]) &&
2692 UsedRegUnits.available(DestReg[0]))) ||
2693 (DestReg[1] != AArch64::NoRegister &&
2694 !(ModifiedRegUnits.available(DestReg[1]) &&
2695 UsedRegUnits.available(DestReg[1]))))
2696 MergeEither = false;
2697
2698
2699
2700
2701 if (BaseRegSP && MBBI->mayLoadOrStore())
2702 MemAccessBeforeSPPreInc = true;
2703 } while (MBBI != B && Count < Limit);
2704 return E;
2705}
2706
2708AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2712 MachineInstr &MemMI = *I;
2714
2715
2716
2718 return E;
2719
2720
2721
2725 return E;
2726
2728
2729
2730
2731 ModifiedRegUnits.clear();
2732 UsedRegUnits.clear();
2733 unsigned Count = 0;
2734 do {
2736 MachineInstr &MI = *MBBI;
2737
2738
2739
2740 if (.isTransient())
2742
2743
2744 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2745 return MBBI;
2746 }
2747
2748
2750
2751
2752
2753 if (!ModifiedRegUnits.available(IndexReg) ||
2754 !UsedRegUnits.available(IndexReg))
2755 return E;
2756
2757 } while (MBBI != B && Count < Limit);
2758 return E;
2759}
2760
2761bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2763 MachineInstr &MI = *MBBI;
2764
2765 if (MI.hasOrderedMemoryRef())
2766 return false;
2767
2769 return false;
2770
2771
2772
2774 return false;
2775
2776
2778 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2779 ++NumLoadsFromStoresPromoted;
2780
2781
2782
2783 MBBI = promoteLoadFromStore(MBBI, StoreI);
2784 return true;
2785 }
2786 return false;
2787}
2788
2789
2790bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2793 MachineInstr &MI = *MBBI;
2795
2796 if (->isCandidateToMergeOrPair(MI))
2797 return false;
2798
2799
2800 LdStPairFlags Flags;
2802 findMatchingInsn(MBBI, Flags, LdStLimit, true);
2803 if (MergeMI != E) {
2804 ++NumZeroStoresPromoted;
2805
2806
2807
2808 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2809 return true;
2810 }
2811 return false;
2812}
2813
2814
2815
2817 MachineInstr &MI = *MBBI;
2819
2820 if (->isCandidateToMergeOrPair(MI))
2821 return false;
2822
2823
2824 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2825 return false;
2826
2827
2828 if (MI.mayStore() && Subtarget->hasDisableStp())
2829 return false;
2830
2831
2832
2833
2834 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2836 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2837
2839 Offset -= OffsetStride;
2841 return false;
2842
2843
2844 LdStPairFlags Flags;
2846 findMatchingInsn(MBBI, Flags, LdStLimit, false);
2847 if (Paired != E) {
2848
2849
2850 auto Prev = std::prev(MBBI);
2851
2852
2853
2854 MachineMemOperand *MemOp =
2855 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2856
2857
2858
2859
2860 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2861 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2862
2863 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2864 NumFailedAlignmentCheck++;
2865 return false;
2866 }
2867
2868
2869
2870 uint64_t MemAlignment = MemOp->getAlign().value();
2871 uint64_t TypeAlignment =
2872 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2873
2874 if (MemAlignment < 2 * TypeAlignment) {
2875 NumFailedAlignmentCheck++;
2876 return false;
2877 }
2878 }
2879
2880 ++NumPairCreated;
2881 if (TII->hasUnscaledLdStOffset(MI))
2882 ++NumUnscaledPairCreated;
2883
2884 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2885
2886
2887 for (auto I = std::next(Prev); I != MBBI; I++)
2889
2890 return true;
2891 }
2892 return false;
2893}
2894
2895bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2897 MachineInstr &MI = *MBBI;
2900
2901
2902
2903
2904
2905
2906 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2907 if (Update != E) {
2908
2909 if (auto NextI = mergeUpdateInsn(MBBI, Update, false,
2910 false,
2911 false)) {
2912 MBBI = *NextI;
2913 return true;
2914 }
2915 }
2916
2917
2918 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2919 return false;
2920
2921
2922
2923
2924
2925
2926 bool MergeEither;
2927 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2928 if (Update != E) {
2929
2930 if (auto NextI = mergeUpdateInsn(MBBI, Update, true,
2931 true, MergeEither)) {
2932 MBBI = *NextI;
2933 return true;
2934 }
2935 }
2936
2937
2938
2939
2940 int UnscaledOffset =
2942
2943
2944
2945
2946
2947
2948 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2949 if (Update != E) {
2950
2951 if (auto NextI = mergeUpdateInsn(MBBI, Update, false,
2952 true,
2953 false)) {
2954 MBBI = *NextI;
2955 return true;
2956 }
2957 }
2958
2959 return false;
2960}
2961
2963 int Scale) {
2964 MachineInstr &MI = *MBBI;
2967
2968
2969 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2970 return false;
2971
2972
2973
2974
2975
2976
2977
2978
2981 if (Update != E && (Offset & (Scale - 1)) == 0) {
2982
2983 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
2984 return true;
2985 }
2986
2987 return false;
2988}
2989
2990bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
2991 bool EnableNarrowZeroStOpt) {
2992 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo();
2993
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3009 else
3011 }
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023 if (EnableNarrowZeroStOpt)
3028 else
3030 }
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3047 DefinedInBB.clear();
3049 }
3050
3053
3054
3056 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3058 else
3060 }
3061
3062
3063
3064
3065
3066
3067
3072 else
3074 }
3075
3076
3077
3078
3079
3080
3081
3082
3085 int Scale;
3088 else
3090 }
3091
3093}
3094
3095bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3097 return false;
3098
3099 Subtarget = &Fn.getSubtarget();
3102 AA = &getAnalysis().getAAResults();
3103
3104
3105
3106
3107 ModifiedRegUnits.init(*TRI);
3110
3112 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3113 for (auto &MBB : Fn) {
3116 }
3117
3119}
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3134 return new AArch64LoadStoreOpt();
3135}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
Definition AArch64LoadStoreOptimizer.cpp:682
static bool isPromotableLoadFromStore(MachineInstr &MI)
Definition AArch64LoadStoreOptimizer.cpp:718
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
Definition AArch64LoadStoreOptimizer.cpp:664
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
Definition AArch64LoadStoreOptimizer.cpp:1545
static unsigned getMatchingPairOpcode(unsigned Opc)
Definition AArch64LoadStoreOptimizer.cpp:348
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
Definition AArch64LoadStoreOptimizer.cpp:1648
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition AArch64LoadStoreOptimizer.cpp:1901
static bool needsWinCFI(const MachineFunction *MF)
Definition AArch64LoadStoreOptimizer.cpp:1641
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition AArch64LoadStoreOptimizer.cpp:1857
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition AArch64LoadStoreOptimizer.cpp:1945
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
Definition AArch64LoadStoreOptimizer.cpp:1566
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
Definition AArch64LoadStoreOptimizer.cpp:1717
static unsigned getPreIndexedOpcode(unsigned Opc)
Definition AArch64LoadStoreOptimizer.cpp:442
#define AARCH64_LOAD_STORE_OPT_NAME
Definition AArch64LoadStoreOptimizer.cpp:89
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
Definition AArch64LoadStoreOptimizer.cpp:968
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
Definition AArch64LoadStoreOptimizer.cpp:711
static unsigned getMatchingWideOpcode(unsigned Opc)
Definition AArch64LoadStoreOptimizer.cpp:329
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
Definition AArch64LoadStoreOptimizer.cpp:274
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
Definition AArch64LoadStoreOptimizer.cpp:2251
static bool isTagStore(const MachineInstr &MI)
Definition AArch64LoadStoreOptimizer.cpp:262
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
Definition AArch64LoadStoreOptimizer.cpp:412
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
Definition AArch64LoadStoreOptimizer.cpp:931
static bool isRewritableImplicitDef(unsigned Opc)
Definition AArch64LoadStoreOptimizer.cpp:836
static unsigned getPostIndexedOpcode(unsigned Opc)
Definition AArch64LoadStoreOptimizer.cpp:551
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
Definition AArch64LoadStoreOptimizer.cpp:736
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
Definition AArch64LoadStoreOptimizer.cpp:693
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
Definition AArch64LoadStoreOptimizer.cpp:630
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
Definition AArch64LoadStoreOptimizer.cpp:802
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
Definition AArch64LoadStoreOptimizer.cpp:953
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition AArch64LoadStoreOptimizer.cpp:1753
static unsigned getBaseAddressOpcode(unsigned Opc)
Definition AArch64LoadStoreOptimizer.cpp:515
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
Definition AArch64LoadStoreOptimizer.cpp:3133
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.