LLVM: lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
47#include
48
49#define GET_GICOMBINER_DEPS
50#include "AArch64GenPostLegalizeGILowering.inc"
51#undef GET_GICOMBINER_DEPS
52
53#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
54
55using namespace llvm;
58
59namespace {
60
61#define GET_GICOMBINER_TYPES
62#include "AArch64GenPostLegalizeGILowering.inc"
63#undef GET_GICOMBINER_TYPES
64
65
66
67
68struct ShuffleVectorPseudo {
69 unsigned Opc;
70 Register Dst;
72 ShuffleVectorPseudo(unsigned Opc, Register Dst,
73 std::initializer_list SrcOps)
74 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
75 ShuffleVectorPseudo() = default;
76};
77
78
79
81 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
82 Register DstReg = MI.getOperand(0).getReg();
83 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
84 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
85 return false;
86
87
88
89
90 return all_of(MRI.use_nodbg_instructions(DstReg),
92}
93
94
96 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
98 const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
99 MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
100 MI.eraseFromParent();
101}
102
103
104
105std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef M,
106 unsigned NumElts) {
107
108 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
109 if (FirstRealElt == M.end())
110 return std::nullopt;
111
112
113 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
114 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
115
116
117
119 make_range(std::next(FirstRealElt), M.end()),
120 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
121 return std::nullopt;
122
123
124
125
126
127
128
130 bool ReverseExt = false;
131
132
133
134
135
136
137
138 if (Imm < NumElts)
139 ReverseExt = true;
140 else
141 Imm -= NumElts;
142 return std::make_pair(ReverseExt, Imm);
143}
144
145
146
147
148
149
150
151
152
154 int NumInputElements) {
155 if (M.size() != static_cast<size_t>(NumInputElements))
156 return std::nullopt;
157 int NumLHSMatch = 0, NumRHSMatch = 0;
158 int LastLHSMismatch = -1, LastRHSMismatch = -1;
159 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
160 if (M[Idx] == -1) {
161 ++NumLHSMatch;
162 ++NumRHSMatch;
163 continue;
164 }
165 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
166 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
167 }
168 const int NumNeededToMatch = NumInputElements - 1;
169 if (NumLHSMatch == NumNeededToMatch)
170 return std::make_pair(true, LastLHSMismatch);
171 if (NumRHSMatch == NumNeededToMatch)
172 return std::make_pair(false, LastRHSMismatch);
173 return std::nullopt;
174}
175
176
177
179 ShuffleVectorPseudo &MatchInfo) {
180 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
181 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();
182 Register Dst = MI.getOperand(0).getReg();
183 Register Src = MI.getOperand(1).getReg();
184 LLT Ty = MRI.getType(Dst);
185 unsigned EltSize = Ty.getScalarSizeInBits();
186
187
188 if (EltSize == 64)
189 return false;
190
191 unsigned NumElts = Ty.getNumElements();
192
193
194 for (unsigned LaneSize : {64U, 32U, 16U}) {
195 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
196 unsigned Opcode;
197 if (LaneSize == 64U)
198 Opcode = AArch64::G_REV64;
199 else if (LaneSize == 32U)
200 Opcode = AArch64::G_REV32;
201 else
202 Opcode = AArch64::G_REV16;
203
204 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
205 return true;
206 }
207 }
208
209 return false;
210}
211
212
213
215 ShuffleVectorPseudo &MatchInfo) {
216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217 unsigned WhichResult;
218 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();
219 Register Dst = MI.getOperand(0).getReg();
220 unsigned NumElts = MRI.getType(Dst).getNumElements();
221 if ((ShuffleMask, NumElts, WhichResult))
222 return false;
223 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
224 Register V1 = MI.getOperand(1).getReg();
225 Register V2 = MI.getOperand(2).getReg();
226 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
227 return true;
228}
229
230
231
232
233
234
236 ShuffleVectorPseudo &MatchInfo) {
237 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
238 unsigned WhichResult;
239 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();
240 Register Dst = MI.getOperand(0).getReg();
241 unsigned NumElts = MRI.getType(Dst).getNumElements();
242 if ((ShuffleMask, NumElts, WhichResult))
243 return false;
244 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
245 Register V1 = MI.getOperand(1).getReg();
246 Register V2 = MI.getOperand(2).getReg();
247 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
248 return true;
249}
250
252 ShuffleVectorPseudo &MatchInfo) {
253 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
254 unsigned WhichResult;
255 unsigned OperandOrder;
256 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();
257 Register Dst = MI.getOperand(0).getReg();
258 unsigned NumElts = MRI.getType(Dst).getNumElements();
259 if ((ShuffleMask, NumElts, WhichResult, OperandOrder))
260 return false;
261 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
262 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
263 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
264 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
265 return true;
266}
267
268
269bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
271 ShuffleVectorPseudo &MatchInfo) {
272 if (Lane != 0)
273 return false;
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
291 MI.getOperand(1).getReg(), MRI);
292 if (!InsMI)
293 return false;
294
295 if ((TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
297 return false;
298
299
301 return false;
302
303 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
304 {InsMI->getOperand(2).getReg()});
305 return true;
306}
307
308
309bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
311 ShuffleVectorPseudo &MatchInfo) {
312 assert(Lane >= 0 && "Expected positive lane?");
313 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
314
315
316 auto *BuildVecMI =
318 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
319
320 if (NumElements <= Lane)
321 Lane -= NumElements;
322
323 if (!BuildVecMI)
324 return false;
325 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
326 MatchInfo =
327 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
328 return true;
329}
330
332 ShuffleVectorPseudo &MatchInfo) {
333 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
335 if (!MaybeLane)
336 return false;
337 int Lane = *MaybeLane;
338
339 if (Lane < 0)
340 Lane = 0;
341 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
342 return true;
343 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
344 return true;
345 return false;
346}
347
348
349
351 unsigned NumElts = Ty.getNumElements();
352
353
354 if (M[0] < 0)
355 return false;
356
357
358
359
360 unsigned ExpectedElt = M[0];
361 for (unsigned I = 1; I < NumElts; ++I) {
362
363
364 ++ExpectedElt;
365 if (ExpectedElt == NumElts)
366 ExpectedElt = 0;
367
368 if (M[I] < 0)
369 continue;
370 if (ExpectedElt != static_cast<unsigned>(M[I]))
371 return false;
372 }
373
374 return true;
375}
376
378 ShuffleVectorPseudo &MatchInfo) {
379 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
380 Register Dst = MI.getOperand(0).getReg();
381 LLT DstTy = MRI.getType(Dst);
382 Register V1 = MI.getOperand(1).getReg();
383 Register V2 = MI.getOperand(2).getReg();
384 auto Mask = MI.getOperand(3).getShuffleMask();
386 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
387 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
388
389 if (!ExtInfo) {
391 !isSingletonExtMask(Mask, DstTy))
392 return false;
393
394 Imm = Mask[0] * ExtFactor;
395 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
396 return true;
397 }
398 bool ReverseExt;
399 std::tie(ReverseExt, Imm) = *ExtInfo;
400 if (ReverseExt)
402 Imm *= ExtFactor;
403 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
404 return true;
405}
406
407
408
410 ShuffleVectorPseudo &MatchInfo) {
412 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
413 MI.eraseFromParent();
414}
415
416
417
418
419void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
421 if (MatchInfo.SrcOps[2].getImm() == 0)
422 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
423 else {
424
425 auto Cst =
426 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
427 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
428 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
429 }
430 MI.eraseFromParent();
431}
432
434 Register Dst = MI.getOperand(0).getReg();
435 Register Src = MI.getOperand(1).getReg();
436 LLT DstTy = MRI.getType(Dst);
438 "Expected 128bit vector in applyFullRev");
440 auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
441 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
442 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
443 MI.eraseFromParent();
444}
445
447 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
448
449 auto ValAndVReg =
451 return !ValAndVReg;
452}
453
457 Builder.setInstrAndDebugLoc(Insert);
458
461 LLT EltTy = MRI.getType(Insert.getElementReg());
462 LLT IdxTy = MRI.getType(Insert.getIndexReg());
463
465 return;
466
467
472 Alignment, false);
475 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
476
477 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
478
479
480
482 "Expected a power-2 vector size");
483 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
485 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
486 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
488 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
489 .getReg(0);
490
491
492 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
493
494 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
495 Insert.eraseFromParent();
496}
497
498
499
500
501
502
503
504
505
506
507
508
510 std::tuple<Register, int, Register, int> &MatchInfo) {
511 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
512 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();
513 Register Dst = MI.getOperand(0).getReg();
514 int NumElts = MRI.getType(Dst).getNumElements();
515 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
516 if (!DstIsLeftAndDstLane)
517 return false;
518 bool DstIsLeft;
519 int DstLane;
520 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
525
526 int SrcLane = ShuffleMask[DstLane];
527 if (SrcLane >= NumElts) {
529 SrcLane -= NumElts;
530 }
531
532 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
533 return true;
534}
535
538 std::tuple<Register, int, Register, int> &MatchInfo) {
539 Builder.setInstrAndDebugLoc(MI);
540 Register Dst = MI.getOperand(0).getReg();
541 auto ScalarTy = MRI.getType(Dst).getElementType();
543 int DstLane, SrcLane;
544 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
545 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
546 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
547 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
548 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
549 MI.eraseFromParent();
550}
551
552
553
554
556 int64_t &Cnt) {
557 assert(Ty.isVector() && "vector shift count is not a vector type");
560 if (!Cst)
561 return false;
562 Cnt = *Cst;
563 int64_t ElementBits = Ty.getScalarSizeInBits();
564 return Cnt >= 1 && Cnt <= ElementBits;
565}
566
567
569 int64_t &Imm) {
570 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
571 MI.getOpcode() == TargetOpcode::G_LSHR);
572 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
573 if (!Ty.isVector())
574 return false;
576}
577
579 int64_t &Imm) {
580 unsigned Opc = MI.getOpcode();
581 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
582 unsigned NewOpc =
583 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
585 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
586 MI.eraseFromParent();
587}
588
589
590
591
592
593
594
595
596std::optional<std::pair<uint64_t, CmpInst::Predicate>>
599 const auto &Ty = MRI.getType(RHS);
600 if (Ty.isVector())
601 return std::nullopt;
602 unsigned Size = Ty.getSizeInBits();
603 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
604
605
606
608 if (!ValAndVReg)
609 return std::nullopt;
610 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
613 return std::nullopt;
614
615
616
617 switch (P) {
618 default:
619 return std::nullopt;
622
623
624
625
626
627
628 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
629 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
630 return std::nullopt;
632 C -= 1;
633 break;
636
637
638
639
640
641
642 assert(C != 0 && "C should not be zero here!");
644 C -= 1;
645 break;
648
649
650
651
652
653
654 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
655 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
656 return std::nullopt;
658 C += 1;
659 break;
662
663
664
665
666
667
668 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
670 return std::nullopt;
672 C += 1;
673 break;
674 }
675
676
677
678 if (Size == 32)
682
683 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
686 return Insn.size();
687 };
688
689 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))
691
692 return std::nullopt;
693}
694
695
696
697
698
699
700
701
702
703bool matchAdjustICmpImmAndPred(
705 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
706 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
709 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
710 MatchInfo = *MaybeNewImmAndPred;
711 return true;
712 }
713 return false;
714}
715
716void applyAdjustICmpImmAndPred(
717 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
723 MatchInfo.first);
725 RHS.setReg(Cst->getOperand(0).getReg());
726 MI.getOperand(1).setPredicate(MatchInfo.second);
728}
729
731 std::pair<unsigned, int> &MatchInfo) {
732 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
733 Register Src1Reg = MI.getOperand(1).getReg();
734 const LLT SrcTy = MRI.getType(Src1Reg);
735 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
736
738 if (!LaneIdx)
739 return false;
740
741
742 if (*LaneIdx >= SrcTy.getNumElements())
743 return false;
744
745 if (DstTy != SrcTy)
746 return false;
747
750
751 unsigned Opc = 0;
752 switch (SrcTy.getNumElements()) {
753 case 2:
754 if (ScalarSize == 64)
755 Opc = AArch64::G_DUPLANE64;
756 else if (ScalarSize == 32)
757 Opc = AArch64::G_DUPLANE32;
758 break;
759 case 4:
760 if (ScalarSize == 32)
761 Opc = AArch64::G_DUPLANE32;
762 else if (ScalarSize == 16)
763 Opc = AArch64::G_DUPLANE16;
764 break;
765 case 8:
766 if (ScalarSize == 8)
767 Opc = AArch64::G_DUPLANE8;
768 else if (ScalarSize == 16)
769 Opc = AArch64::G_DUPLANE16;
770 break;
771 case 16:
772 if (ScalarSize == 8)
773 Opc = AArch64::G_DUPLANE8;
774 break;
775 default:
776 break;
777 }
778 if ()
779 return false;
780
781 MatchInfo.first = Opc;
782 MatchInfo.second = *LaneIdx;
783 return true;
784}
785
788 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
789 Register Src1Reg = MI.getOperand(1).getReg();
790 const LLT SrcTy = MRI.getType(Src1Reg);
791
792 B.setInstrAndDebugLoc(MI);
793 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
794
795 Register DupSrc = MI.getOperand(1).getReg();
796
797
798 if (SrcTy.getSizeInBits() == 64) {
799 auto Undef = B.buildUndef(SrcTy);
800 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
801 {Src1Reg, Undef.getReg(0)})
802 .getReg(0);
803 }
804 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
805 MI.eraseFromParent();
806}
807
810 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
811 const LLT SrcTy = MRI.getType(Src1Reg);
812 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
813 return false;
814 return SrcTy.isVector() && !SrcTy.isScalable() &&
815 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
816}
817
821 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
822 const LLT SrcTy = MRI.getType(Src1Reg);
823 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
824 "Expected a fixed length vector");
825
826 for (int I = 0; I < SrcTy.getNumElements(); ++I)
827 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
828 MI.eraseFromParent();
829}
830
832 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
833
834
835
836
838 return false;
839
841}
842
845 B.setInstrAndDebugLoc(MI);
846 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
847 {MI.getOperand(1).getReg()});
848 MI.eraseFromParent();
849}
850
851
852
854
855 if (.hasOneNonDBGUse(CmpOp))
856 return 0;
857
858
859 auto IsSupportedExtend = [&](const MachineInstr &MI) {
860 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
861 return true;
862 if (MI.getOpcode() != TargetOpcode::G_AND)
863 return false;
864 auto ValAndVReg =
866 if (!ValAndVReg)
867 return false;
868 uint64_t Mask = ValAndVReg->Value.getZExtValue();
869 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
870 };
871
873 if (IsSupportedExtend(*Def))
874 return 1;
875
876 unsigned Opc = Def->getOpcode();
877 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
878 Opc != TargetOpcode::G_LSHR)
879 return 0;
880
881 auto MaybeShiftAmt =
883 if (!MaybeShiftAmt)
884 return 0;
885 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
888
889
890
891
892 if (IsSupportedExtend(*ShiftLHS))
893 return (ShiftAmt <= 4) ? 2 : 1;
894
895 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
896 if (Ty.isVector())
897 return 0;
898 unsigned ShiftSize = Ty.getSizeInBits();
899 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
900 (ShiftSize == 64 && ShiftAmt <= 63))
901 return 1;
902 return 0;
903}
904
905
906
908 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
909
910
911
912
913
914
915
916
917
918
919
923 return false;
924
927 auto GetRegForProfit = [&](Register Reg) {
929 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
930 };
931
932
933
936
937
938
941}
942
949 MI.getOperand(2).setReg(RHS);
950 MI.getOperand(3).setReg(LHS);
952}
953
954
955
956
961 assert(DstTy.isVector() && "Expected vector types only?");
962 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
963 switch (CC) {
964 default:
968 auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
970 };
974 };
978 };
982 };
986 };
990 };
991 }
992}
993
994
997 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
999
1000 Register Dst = MI.getOperand(0).getReg();
1001 LLT DstTy = MRI.getType(Dst);
1002 if (!DstTy.isVector() || .hasNEON())
1003 return false;
1005 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
1006 if (EltSize == 16 && .hasFullFP16())
1007 return false;
1008 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1009 return false;
1010
1011 return true;
1012}
1013
1014
1017 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1019
1021
1022 Register Dst = CmpMI.getReg(0);
1026
1027 LLT DstTy = MRI.getType(Dst);
1028
1029 bool Invert = false;
1034
1035
1036
1037
1038
1041 } else
1043
1044
1046
1047 const bool NoNans =
1048 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1049
1050 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1053 CmpRes = Cmp(MIB);
1054 else {
1055 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
1056 auto Cmp2Dst = Cmp2(MIB);
1057 auto Cmp1Dst = Cmp(MIB);
1058 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1059 }
1060 if (Invert)
1062 MRI.replaceRegWith(Dst, CmpRes);
1063 MI.eraseFromParent();
1064}
1065
1066
1069
1070
1071 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1072 auto ConstVal =
1074
1075 if (!ConstVal.has_value())
1076 return true;
1077 }
1078
1079 return false;
1080}
1081
1085 LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1086 Register DstReg = B.buildUndef(DstTy).getReg(0);
1087
1088 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1089 Register SrcReg = GBuildVec->getSourceReg(I);
1091 continue;
1092 auto IdxReg = B.buildConstant(LLT::scalar(64), I);
1093 DstReg =
1094 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1095 }
1096 B.buildCopy(GBuildVec->getReg(0), DstReg);
1097 GBuildVec->eraseFromParent();
1098}
1099
1102 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1103 Register DstReg = MI.getOperand(0).getReg();
1104 if (MRI.getType(DstReg).isVector())
1105 return false;
1106
1108 return false;
1109
1110 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1111}
1112
1116 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1118 MI.getOperand(0).setReg(SrcReg);
1120}
1121
1122
1123
1124
1126 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1127 Register DstReg = MI.getOperand(0).getReg();
1128 LLT DstTy = MRI.getType(DstReg);
1130}
1131
1134 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1135 B.setInstrAndDebugLoc(MI);
1137 Helper.lower(MI, 0, LLT());
1138}
1139
1140
1141
1145 if (Unmerge.getNumDefs() != 2)
1146 return false;
1147 if (.use_nodbg_empty(Unmerge.getReg(1)))
1148 return false;
1149
1150 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1152 return false;
1153
1155 if (!Ext)
1156 return false;
1157
1160 auto LowestVal =
1162 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1163 return false;
1164
1166 return false;
1167
1168 MatchInfo = ExtSrc1;
1169 return true;
1170}
1171
1176
1177 Register Dst1 = MI.getOperand(0).getReg();
1178 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1179 MI.getOperand(1).setReg(Dst1);
1180 MI.getOperand(2).setReg(SrcReg);
1182}
1183
1184
1185
1186
1187
1188
1190
1191 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1193}
1194
1197 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1198 "Expected a G_MUL instruction");
1199
1200
1201 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1204 Helper.fewerElementsVector(
1205 MI, 0,
1207}
1208
1209class AArch64PostLegalizerLoweringImpl : public Combiner {
1210protected:
1211 const CombinerHelper Helper;
1212 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1213 const AArch64Subtarget &STI;
1214
1215public:
1216 AArch64PostLegalizerLoweringImpl(
1217 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1218 GISelCSEInfo *CSEInfo,
1219 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1220 const AArch64Subtarget &STI);
1221
1222 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1223
1224 bool tryCombineAll(MachineInstr &I) const override;
1225
1226private:
1227#define GET_GICOMBINER_CLASS_MEMBERS
1228#include "AArch64GenPostLegalizeGILowering.inc"
1229#undef GET_GICOMBINER_CLASS_MEMBERS
1230};
1231
1232#define GET_GICOMBINER_IMPL
1233#include "AArch64GenPostLegalizeGILowering.inc"
1234#undef GET_GICOMBINER_IMPL
1235
1236AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1239 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1241 : Combiner(MF, CInfo, TPC, nullptr, CSEInfo),
1242 Helper(Observer, B, true), RuleConfig(RuleConfig),
1243 STI(STI),
1245#include "AArch64GenPostLegalizeGILowering.inc"
1247{
1248}
1249
1251public:
1252 static char ID;
1253
1254 AArch64PostLegalizerLowering();
1255
1256 StringRef getPassName() const override {
1257 return "AArch64PostLegalizerLowering";
1258 }
1259
1262
1263private:
1264 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1265};
1266}
1267
1268void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1273}
1274
1275AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1277 if (!RuleConfig.parseCommandLineOption())
1279}
1280
1281bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1283 return false;
1284 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1285 auto *TPC = &getAnalysis();
1287
1289 CombinerInfo CInfo( true, false,
1290 nullptr, true,
1291 F.hasOptSize(), F.hasMinSize());
1292
1293 CInfo.MaxIterations = 1;
1295
1296 CInfo.EnableFullDCE = false;
1297 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, nullptr,
1298 RuleConfig, ST);
1299 return Impl.combineMachineInstrs();
1300}
1301
1302char AArch64PostLegalizerLowering::ID = 0;
1304 "Lower AArch64 MachineInstrs after legalization", false,
1305 false)
1308 "Lower AArch64 MachineInstrs after legalization", false,
1310
1311namespace llvm {
1313 return new AArch64PostLegalizerLowering();
1314}
1315}
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
unsigned logBase2() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
FunctionPass class - This class is used to implement most global optimizations.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert = Opcode .
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Target-Independent Code Generator Pass Configuration Options.
A Use represents the edge between a Value definition and its users.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
constexpr bool isLegalArithImmed(const uint64_t C)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ImplicitDefMatch m_GImplicitDef()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
@ Undef
Value of the register doesn't matter.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
FunctionPass * createAArch64PostLegalizerLowering()
Definition AArch64PostLegalizerLowering.cpp:1312
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.