LLVM: lib/Target/AArch64/AArch64TargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
23#include "llvm/IR/IntrinsicsAArch64.h"
29#include
30#include
31using namespace llvm;
33
34#define DEBUG_TYPE "aarch64tti"
35
38
40 "sve-prefer-fixed-over-scalable-if-equal", cl::Hidden);
41
44
47
50
54
58 "Penalty of calling a function that requires a change to PSTATE.SM"));
59
62 cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));
63
66
69
70
73 cl::desc("The cost of a histcnt instruction"));
74
77 cl::desc("The number of instructions to search for a redundant dmb"));
78
79namespace {
80class TailFoldingOption {
81
82
83
84
85
86
87
88
92
93
94
95 bool NeedsDefault = true;
96
97 void setInitialBits(TailFoldingOpts Bits) { InitialBits = Bits; }
98
99 void setNeedsDefault(bool V) { NeedsDefault = V; }
100
102 EnableBits |= Bit;
103 DisableBits &= ~Bit;
104 }
105
107 EnableBits &= ~Bit;
108 DisableBits |= Bit;
109 }
110
113
114 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
115 "Initial bits should only include one of "
116 "(disabled|all|simple|default)");
117 Bits = NeedsDefault ? DefaultBits : InitialBits;
118 Bits |= EnableBits;
119 Bits &= ~DisableBits;
120
122 }
123
125 errs() << "invalid argument '" << Opt
126 << "' to -sve-tail-folding=; the option should be of the form\n"
127 " (disabled|all|default|simple)[+(reductions|recurrences"
128 "|reverse|noreductions|norecurrences|noreverse)]\n";
130 }
131
132public:
133
134 void operator=(const std::string &Val) {
135
136 if (Val.empty()) {
138 return;
139 }
140
141
142
143 setNeedsDefault(false);
144
147
148 unsigned StartIdx = 1;
149 if (TailFoldTypes[0] == "disabled")
150 setInitialBits(TailFoldingOpts::Disabled);
151 else if (TailFoldTypes[0] == "all")
152 setInitialBits(TailFoldingOpts::All);
153 else if (TailFoldTypes[0] == "default")
154 setNeedsDefault(true);
155 else if (TailFoldTypes[0] == "simple")
156 setInitialBits(TailFoldingOpts::Simple);
157 else {
158 StartIdx = 0;
159 setInitialBits(TailFoldingOpts::Disabled);
160 }
161
162 for (unsigned I = StartIdx; I < TailFoldTypes.size(); I++) {
163 if (TailFoldTypes[I] == "reductions")
164 setEnableBit(TailFoldingOpts::Reductions);
165 else if (TailFoldTypes[I] == "recurrences")
166 setEnableBit(TailFoldingOpts::Recurrences);
167 else if (TailFoldTypes[I] == "reverse")
168 setEnableBit(TailFoldingOpts::Reverse);
169 else if (TailFoldTypes[I] == "noreductions")
170 setDisableBit(TailFoldingOpts::Reductions);
171 else if (TailFoldTypes[I] == "norecurrences")
172 setDisableBit(TailFoldingOpts::Recurrences);
173 else if (TailFoldTypes[I] == "noreverse")
174 setDisableBit(TailFoldingOpts::Reverse);
175 else
177 }
178 }
179
182 }
183};
184}
185
187
189 "sve-tail-folding",
191 "Control the use of vectorisation using tail-folding for SVE where the"
192 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
193 "\ndisabled (Initial) No loop types will vectorize using "
194 "tail-folding"
195 "\ndefault (Initial) Uses the default tail-folding settings for "
196 "the target CPU"
197 "\nall (Initial) All legal loop types will vectorize using "
198 "tail-folding"
199 "\nsimple (Initial) Use tail-folding for simple loops (not "
200 "reductions or recurrences)"
201 "\nreductions Use tail-folding for loops containing reductions"
202 "\nnoreductions Inverse of above"
203 "\nrecurrences Use tail-folding for loops containing fixed order "
204 "recurrences"
205 "\nnorecurrences Inverse of above"
206 "\nreverse Use tail-folding for loops requiring reversed "
207 "predicates"
208 "\nnoreverse Inverse of above"),
210
211
212
213
215 "enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
216
217
218
219
221 "enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
222
226 .Case("__arm_sme_state", true)
227 .Case("__arm_tpidr2_save", true)
228 .Case("__arm_tpidr2_restore", true)
229 .Case("__arm_za_disable", true)
231}
232
233
234
235
239
240
241
242
243 if (isa(I) && .isDebugOrPseudoInst() &&
244 (cast(I).isInlineAsm() || isa(I) ||
246 return true;
247 }
248 }
249 return false;
250}
251
255 StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();
257 FeatureStr.split(Features, ",");
259}
260
262 return F.hasFnAttribute("fmv-features");
263}
264
266 const Function *Callee) const {
267 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
268
269
270
274 }
275
277 return false;
278
279 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
280 CallerAttrs.requiresSMChange(CalleeAttrs) ||
281 CallerAttrs.requiresPreservingZT0(CalleeAttrs) ||
282 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs)) {
284 return false;
285 }
286
288}
289
294 return false;
295
296
297
298
299
300
301
302
303
305 auto FVTy = dyn_cast(Ty);
306 return FVTy &&
307 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
308 }))
309 return false;
310
311 return true;
312}
313
314unsigned
316 unsigned DefaultCallPenalty) const {
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
339 if (F == Call.getCaller())
343 }
344
345 return DefaultCallPenalty;
346}
347
353}
354
355
356
357
359
361 return 0;
362
363 if (Val < 0)
364 Val = ~Val;
365
366
369 return Insn.size();
370}
371
372
376
378 if (BitSize == 0)
379 return ~0U;
380
381
382 APInt ImmVal = Imm;
383 if (BitSize & 0x3f)
384 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
385
386
387
389 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
393 }
394
395 return std::max(1, Cost);
396}
397
403
405
406
407 if (BitSize == 0)
409
410 unsigned ImmIdx = ~0U;
411 switch (Opcode) {
412 default:
414 case Instruction::GetElementPtr:
415
416 if (Idx == 0)
419 case Instruction::Store:
420 ImmIdx = 0;
421 break;
422 case Instruction::Add:
423 case Instruction::Sub:
424 case Instruction::Mul:
425 case Instruction::UDiv:
426 case Instruction::SDiv:
427 case Instruction::URem:
428 case Instruction::SRem:
429 case Instruction::And:
430 case Instruction::Or:
431 case Instruction::Xor:
432 case Instruction::ICmp:
433 ImmIdx = 1;
434 break;
435
436 case Instruction::Shl:
437 case Instruction::LShr:
438 case Instruction::AShr:
439 if (Idx == 1)
441 break;
442 case Instruction::Trunc:
443 case Instruction::ZExt:
444 case Instruction::SExt:
445 case Instruction::IntToPtr:
446 case Instruction::PtrToInt:
447 case Instruction::BitCast:
448 case Instruction::PHI:
449 case Instruction::Call:
450 case Instruction::Select:
451 case Instruction::Ret:
452 case Instruction::Load:
453 break;
454 }
455
456 if (Idx == ImmIdx) {
457 int NumConstants = (BitSize + 63) / 64;
462 }
464}
465
471
473
474
475 if (BitSize == 0)
477
478
479
480
481 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
483
484 switch (IID) {
485 default:
487 case Intrinsic::sadd_with_overflow:
488 case Intrinsic::uadd_with_overflow:
489 case Intrinsic::ssub_with_overflow:
490 case Intrinsic::usub_with_overflow:
491 case Intrinsic::smul_with_overflow:
492 case Intrinsic::umul_with_overflow:
493 if (Idx == 1) {
494 int NumConstants = (BitSize + 63) / 64;
499 }
500 break;
501 case Intrinsic::experimental_stackmap:
502 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
504 break;
505 case Intrinsic::experimental_patchpoint_void:
506 case Intrinsic::experimental_patchpoint:
507 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
509 break;
510 case Intrinsic::experimental_gc_statepoint:
511 if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
513 break;
514 }
516}
517
521 if (TyWidth == 32 || TyWidth == 64)
523
525}
526
530}
531
533 Type *BucketPtrsTy = ICA.getArgTypes()[0];
534 Type *EltTy = ICA.getArgTypes()[1];
535 unsigned TotalHistCnts = 1;
536
538
541
542
543
544 if (VectorType *VTy = dyn_cast(BucketPtrsTy)) {
545 unsigned EC = VTy->getElementCount().getKnownMinValue();
548
549
550 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
551
552 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
554
556 TotalHistCnts = EC / NaturalVectorWidth;
557 }
558
560}
561
565
566
567
568
570 if (auto *VTy = dyn_cast(RetTy))
573
574 switch (ICA.getID()) {
575 case Intrinsic::experimental_vector_histogram_add:
576 if (!ST->hasSVE2())
579 case Intrinsic::umin:
580 case Intrinsic::umax:
581 case Intrinsic::smin:
582 case Intrinsic::smax: {
583 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
584 MVT::v8i16, MVT::v2i32, MVT::v4i32,
585 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
586 MVT::nxv2i64};
588
589 if (LT.second == MVT::v2i64)
590 return LT.first * 2;
591 if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }))
592 return LT.first;
593 break;
594 }
595 case Intrinsic::sadd_sat:
596 case Intrinsic::ssub_sat:
597 case Intrinsic::uadd_sat:
598 case Intrinsic::usub_sat: {
599 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
600 MVT::v8i16, MVT::v2i32, MVT::v4i32,
601 MVT::v2i64};
603
604
605 unsigned Instrs =
606 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
607 if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; }))
608 return LT.first * Instrs;
609 break;
610 }
611 case Intrinsic::abs: {
612 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
613 MVT::v8i16, MVT::v2i32, MVT::v4i32,
614 MVT::v2i64};
616 if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }))
617 return LT.first;
618 break;
619 }
620 case Intrinsic::bswap: {
621 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
622 MVT::v4i32, MVT::v2i64};
624 if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }) &&
625 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
626 return LT.first;
627 break;
628 }
629 case Intrinsic::stepvector: {
632
633
634 if (LT.first > 1) {
638 Cost += AddCost * (LT.first - 1);
639 }
641 }
642 case Intrinsic::vector_extract:
643 case Intrinsic::vector_insert: {
644
645
646
647
648
649
652 break;
653
656 bool IsExtract = ICA.getID() == Intrinsic::vector_extract;
659
660
662 break;
663
673 break;
674 }
675 case Intrinsic::bitreverse: {
676 static const CostTblEntry BitreverseTbl[] = {
677 {Intrinsic::bitreverse, MVT::i32, 1},
678 {Intrinsic::bitreverse, MVT::i64, 1},
679 {Intrinsic::bitreverse, MVT::v8i8, 1},
680 {Intrinsic::bitreverse, MVT::v16i8, 1},
681 {Intrinsic::bitreverse, MVT::v4i16, 2},
682 {Intrinsic::bitreverse, MVT::v8i16, 2},
683 {Intrinsic::bitreverse, MVT::v2i32, 2},
684 {Intrinsic::bitreverse, MVT::v4i32, 2},
685 {Intrinsic::bitreverse, MVT::v1i64, 2},
686 {Intrinsic::bitreverse, MVT::v2i64, 2},
687 };
689 const auto *Entry =
691 if (Entry) {
692
693
696 return LegalisationCost.first * Entry->Cost + 1;
697
698 return LegalisationCost.first * Entry->Cost;
699 }
700 break;
701 }
702 case Intrinsic::ctpop: {
703 if (!ST->hasNEON()) {
704
706 }
717 };
719 MVT MTy = LT.second;
721
722
724 RetTy->getScalarSizeInBits()
725 ? 1
726 : 0;
727 return LT.first * Entry->Cost + ExtraCost;
728 }
729 break;
730 }
731 case Intrinsic::sadd_with_overflow:
732 case Intrinsic::uadd_with_overflow:
733 case Intrinsic::ssub_with_overflow:
734 case Intrinsic::usub_with_overflow:
735 case Intrinsic::smul_with_overflow:
736 case Intrinsic::umul_with_overflow: {
737 static const CostTblEntry WithOverflowCostTbl[] = {
738 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
739 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
740 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
741 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
742 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
743 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
744 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
745 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
746 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
747 {Intrinsic::usub_with_overflow, MVT::i8, 3},
748 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
749 {Intrinsic::usub_with_overflow, MVT::i16, 3},
750 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
751 {Intrinsic::usub_with_overflow, MVT::i32, 1},
752 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
753 {Intrinsic::usub_with_overflow, MVT::i64, 1},
754 {Intrinsic::smul_with_overflow, MVT::i8, 5},
755 {Intrinsic::umul_with_overflow, MVT::i8, 4},
756 {Intrinsic::smul_with_overflow, MVT::i16, 5},
757 {Intrinsic::umul_with_overflow, MVT::i16, 4},
758 {Intrinsic::smul_with_overflow, MVT::i32, 2},
759 {Intrinsic::umul_with_overflow, MVT::i32, 2},
760 {Intrinsic::smul_with_overflow, MVT::i64, 3},
761 {Intrinsic::umul_with_overflow, MVT::i64, 3},
762 };
767 return Entry->Cost;
768 break;
769 }
770 case Intrinsic::fptosi_sat:
771 case Intrinsic::fptoui_sat: {
773 break;
774 bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
777
778
779 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
780 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
781 LT.second == MVT::v2f64)) {
783 (LT.second == MVT::f64 && MTy == MVT::i32) ||
784 (LT.second == MVT::f32 && MTy == MVT::i64)))
785 return LT.first;
786
787 if (LT.second.getScalarType() == MVT::f32 && MTy.isFixedLengthVector() &&
790 }
791
792
793 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
800 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
801 (LT.second == MVT::f16 && MTy == MVT::i64) ||
802 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
804 return LT.first;
805
809
810
811 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&
814
815
816 if ((LT.second.getScalarType() == MVT::f32 ||
817 LT.second.getScalarType() == MVT::f64 ||
818 LT.second.getScalarType() == MVT::f16) &&
820 Type *LegalTy =
822 if (LT.second.isVector())
823 LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount());
826 LegalTy, {LegalTy, LegalTy});
829 LegalTy, {LegalTy, LegalTy});
831 return LT.first * Cost +
832 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
833 : 1);
834 }
835
836
839 if (LT.second.isVector()) {
840 FPTy = VectorType::get(FPTy, LT.second.getVectorElementCount());
842 }
848 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
850 if (IsSigned) {
851 Type *CondTy = RetTy->getWithNewBitWidth(1);
856 }
857 return LT.first * Cost;
858 }
859 case Intrinsic::fshl:
860 case Intrinsic::fshr: {
862 break;
863
864
867 break;
868
871
873 {Intrinsic::fshl, MVT::v4i32, 3},
874 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
875 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
876 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
877
878
879 const auto *Entry =
880 CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second);
881 if (Entry)
882 return LegalisationCost.first * Entry->Cost;
883 }
884
886 if (->isIntegerTy())
887 break;
888
889
890
891 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
892 RetTy->getScalarSizeInBits() < 64) ||
893 (RetTy->getScalarSizeInBits() % 64 != 0);
894 unsigned ExtraCost = HigherCost ? 1 : 0;
895 if (RetTy->getScalarSizeInBits() == 32 ||
896 RetTy->getScalarSizeInBits() == 64)
897 ExtraCost = 0;
898
899 else if (HigherCost)
900 ExtraCost = 1;
901 else
902 break;
903 return TyL.first + ExtraCost;
904 }
905 case Intrinsic::get_active_lane_mask: {
910 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
912
913
914
915
916
917
918
919
920
921 return RetTy->getNumElements() * 2;
922 }
923 }
924 break;
925 }
926 case Intrinsic::experimental_vector_match: {
927 auto *NeedleTy = cast(ICA.getArgTypes()[1]);
929 unsigned SearchSize = NeedleTy->getNumElements();
930 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
931
932
933
934
935
937 if (isa(RetTy))
940 }
941 break;
942 }
943 default:
944 break;
945 }
947}
948
949
950
954 auto RequiredType = II.getType();
955
956 auto *PN = dyn_cast(II.getArgOperand(0));
957 assert(PN && "Expected Phi Node!");
958
959
960 if (!PN->hasOneUse())
961 return std::nullopt;
962
963 for (Value *IncValPhi : PN->incoming_values()) {
964 auto *Reinterpret = dyn_cast(IncValPhi);
965 if (!Reinterpret ||
966 Reinterpret->getIntrinsicID() !=
967 Intrinsic::aarch64_sve_convert_to_svbool ||
968 RequiredType != Reinterpret->getArgOperand(0)->getType())
969 return std::nullopt;
970 }
971
972
976
977 for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
978 auto *Reinterpret = cast(PN->getIncomingValue(I));
979 NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
981 }
982
983
985}
986
987
988
989
990
991
992
993
994
995
996static std::optional<Instruction *>
998 auto BinOp = dyn_cast(II.getOperand(0));
999 if (!BinOp)
1000 return std::nullopt;
1001
1002 auto IntrinsicID = BinOp->getIntrinsicID();
1003 switch (IntrinsicID) {
1004 case Intrinsic::aarch64_sve_and_z:
1005 case Intrinsic::aarch64_sve_bic_z:
1006 case Intrinsic::aarch64_sve_eor_z:
1007 case Intrinsic::aarch64_sve_nand_z:
1008 case Intrinsic::aarch64_sve_nor_z:
1009 case Intrinsic::aarch64_sve_orn_z:
1010 case Intrinsic::aarch64_sve_orr_z:
1011 break;
1012 default:
1013 return std::nullopt;
1014 }
1015
1016 auto BinOpPred = BinOp->getOperand(0);
1017 auto BinOpOp1 = BinOp->getOperand(1);
1018 auto BinOpOp2 = BinOp->getOperand(2);
1019
1020 auto PredIntr = dyn_cast(BinOpPred);
1021 if (!PredIntr ||
1022 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1023 return std::nullopt;
1024
1025 auto PredOp = PredIntr->getOperand(0);
1026 auto PredOpTy = cast(PredOp->getType());
1027 if (PredOpTy != II.getType())
1028 return std::nullopt;
1029
1032 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1033 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
1034 if (BinOpOp1 == BinOpOp2)
1035 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
1036 else
1038 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1039
1040 auto NarrowedBinOp =
1043}
1044
1045static std::optional<Instruction *>
1047
1048 if (isa(II.getArgOperand(0)))
1050
1052 return BinOpCombine;
1053
1054
1055 if (isa(II.getArgOperand(0)->getType()) ||
1056 isa(II.getType()))
1057 return std::nullopt;
1058
1060 Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;
1061
1062 const auto *IVTy = cast(II.getType());
1063
1064
1065 while (Cursor) {
1066
1067
1068 const auto *CursorVTy = cast(Cursor->getType());
1069 if (CursorVTy->getElementCount().getKnownMinValue() <
1070 IVTy->getElementCount().getKnownMinValue())
1071 break;
1072
1073
1074 if (Cursor->getType() == IVTy)
1075 EarliestReplacement = Cursor;
1076
1077 auto *IntrinsicCursor = dyn_cast(Cursor);
1078
1079
1080 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1081 Intrinsic::aarch64_sve_convert_to_svbool ||
1082 IntrinsicCursor->getIntrinsicID() ==
1083 Intrinsic::aarch64_sve_convert_from_svbool))
1084 break;
1085
1086 CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
1087 Cursor = IntrinsicCursor->getOperand(0);
1088 }
1089
1090
1091
1092 if (!EarliestReplacement)
1093 return std::nullopt;
1094
1096}
1097
1099
1100 Value *UncastedPred;
1101 if (match(Pred, m_IntrinsicIntrinsic::aarch64\_sve\_convert\_from\_svbool(
1102 m_IntrinsicIntrinsic::aarch64\_sve\_convert\_to\_svbool(
1103 m_Value(UncastedPred)))))
1104
1105
1106 if (cast(Pred->getType())->getMinNumElements() <=
1107 cast(UncastedPred->getType())->getMinNumElements())
1108 Pred = UncastedPred;
1109
1110 return match(Pred, m_IntrinsicIntrinsic::aarch64\_sve\_ptrue(
1111 m_ConstantIntAArch64SVEPredPattern::all()));
1112}
1113
1114
1115
1116static std::optional<Instruction *>
1118 bool hasInactiveVector) {
1119 int PredOperand = hasInactiveVector ? 1 : 0;
1120 int ReplaceOperand = hasInactiveVector ? 0 : 1;
1124 }
1125 return std::nullopt;
1126}
1127
1128
1129
1130static std::optional<Instruction *>
1133 !isallvm::UndefValue(II.getOperand(0)) &&
1134 !isallvm::PoisonValue(II.getOperand(0))) {
1137 }
1139}
1140
1141
1142static std::optional<Instruction *>
1144 int PredPos) {
1147 }
1148 return std::nullopt;
1149}
1150
1151
1152
1153static std::optional<Instruction *>
1158 if (RetTy->isStructTy()) {
1159 auto StructT = cast(RetTy);
1160 auto VecT = StructT->getElementType(0);
1162 for (unsigned i = 0; i < StructT->getNumElements(); i++) {
1163 ZerVec.push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)
1164 : ConstantInt::get(VecT, 0));
1165 }
1167 } else
1168 Node = RetTy->isFPOrFPVectorTy() ? ConstantFP::get(RetTy, 0.0)
1169 : ConstantInt::get(II.getType(), 0);
1170
1173 }
1174 return std::nullopt;
1175}
1176
1179
1180 auto *OpPredicate = II.getOperand(0);
1183
1187}
1188
1191 IntrinsicInst *Pg = dyn_cast(II.getArgOperand(1));
1192 if (!Pg)
1193 return std::nullopt;
1194
1195 if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1196 return std::nullopt;
1197
1198 const auto PTruePattern =
1199 cast(Pg->getOperand(0))->getZExtValue();
1200 if (PTruePattern != AArch64SVEPredPattern::vl1)
1201 return std::nullopt;
1202
1203
1206 II.getArgOperand(0), II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1207 Insert->insertBefore(&II);
1208 Insert->takeName(&II);
1209
1211}
1212
1215
1216 auto *RetTy = cast(II.getType());
1218 II.getArgOperand(0));
1221}
1222
1226
1227
1229 return II_NA;
1230
1231
1232 auto *Pg = dyn_cast(II.getArgOperand(0));
1233 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1234 return std::nullopt;
1235
1236 const auto PTruePattern =
1237 cast(Pg->getOperand(0))->getZExtValue();
1238 if (PTruePattern != AArch64SVEPredPattern::all)
1239 return std::nullopt;
1240
1241
1242 auto *SplatValue =
1243 dyn_cast_or_null(getSplatValue(II.getArgOperand(2)));
1244 if (!SplatValue || !SplatValue->isZero())
1245 return std::nullopt;
1246
1247
1248 auto *DupQLane = dyn_cast(II.getArgOperand(1));
1249 if (!DupQLane ||
1250 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1251 return std::nullopt;
1252
1253
1254 auto *DupQLaneIdx = dyn_cast(DupQLane->getArgOperand(1));
1255 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1256 return std::nullopt;
1257
1258 auto *VecIns = dyn_cast(DupQLane->getArgOperand(0));
1259 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1260 return std::nullopt;
1261
1262
1263
1264 if (!isa(VecIns->getArgOperand(0)))
1265 return std::nullopt;
1266
1267 if (!cast(VecIns->getArgOperand(2))->isZero())
1268 return std::nullopt;
1269
1270 auto *ConstVec = dyn_cast(VecIns->getArgOperand(1));
1271 if (!ConstVec)
1272 return std::nullopt;
1273
1274 auto *VecTy = dyn_cast(ConstVec->getType());
1275 auto *OutTy = dyn_cast(II.getType());
1276 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1277 return std::nullopt;
1278
1279 unsigned NumElts = VecTy->getNumElements();
1280 unsigned PredicateBits = 0;
1281
1282
1283 for (unsigned I = 0; I < NumElts; ++I) {
1284 auto *Arg = dyn_cast(ConstVec->getAggregateElement(I));
1285 if (!Arg)
1286 return std::nullopt;
1287 if (!Arg->isZero())
1288 PredicateBits |= 1 << (I * (16 / NumElts));
1289 }
1290
1291
1292 if (PredicateBits == 0) {
1294 PFalse->takeName(&II);
1296 }
1297
1298
1299 unsigned Mask = 8;
1300 for (unsigned I = 0; I < 16; ++I)
1301 if ((PredicateBits & (1 << I)) != 0)
1302 Mask |= (I % 8);
1303
1304 unsigned PredSize = Mask & -Mask;
1307
1308
1309 for (unsigned I = 0; I < 16; I += PredSize)
1310 if ((PredicateBits & (1 << I)) == 0)
1311 return std::nullopt;
1312
1313 auto *PTruePat =
1314 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
1316 {PredType}, {PTruePat});
1318 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1319 auto *ConvertFromSVBool =
1321 {II.getType()}, {ConvertToSVBool});
1322
1325}
1326
1329 Value *Pg = II.getArgOperand(0);
1330 Value *Vec = II.getArgOperand(1);
1331 auto IntrinsicID = II.getIntrinsicID();
1332 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1333
1334
1337
1338
1339
1343 auto *OldBinOp = cast(Vec);
1344 auto OpC = OldBinOp->getOpcode();
1345 auto *NewLHS =
1347 auto *NewRHS =
1350 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), II.getIterator());
1352 }
1353 }
1354
1355 auto *C = dyn_cast(Pg);
1356 if (IsAfter && C && C->isNullValue()) {
1357
1360 Extract->insertBefore(&II);
1361 Extract->takeName(&II);
1363 }
1364
1365 auto *IntrPG = dyn_cast(Pg);
1366 if (!IntrPG)
1367 return std::nullopt;
1368
1369 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1370 return std::nullopt;
1371
1372 const auto PTruePattern =
1373 cast(IntrPG->getOperand(0))->getZExtValue();
1374
1375
1377 if (!MinNumElts)
1378 return std::nullopt;
1379
1380 unsigned Idx = MinNumElts - 1;
1381
1382
1383 if (IsAfter)
1385
1386
1387
1388
1389 auto *PgVTy = cast(Pg->getType());
1390 if (Idx >= PgVTy->getMinNumElements())
1391 return std::nullopt;
1392
1393
1396 Extract->insertBefore(&II);
1397 Extract->takeName(&II);
1399}
1400
1403
1404
1405
1406
1407
1408
1409
1410 Value *Pg = II.getArgOperand(0);
1412 Value *Vec = II.getArgOperand(2);
1414
1416 return std::nullopt;
1417
1419 switch (cast(Ty)->getBitWidth()) {
1420 default:
1421 return std::nullopt;
1422 case 16:
1424 break;
1425 case 32:
1427 break;
1428 case 64:
1430 break;
1431 }
1432
1435 FPTy, cast(Vec->getType())->getElementCount());
1438 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1441}
1442
1446
1447
1448 auto *AllPat =
1449 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
1451 {II.getType()}, {AllPat});
1452 auto *RDFFR =
1456}
1457
1458static std::optional<Instruction *>
1460 const auto Pattern = cast(II.getArgOperand(0))->getZExtValue();
1461
1462 if (Pattern == AArch64SVEPredPattern::all) {
1463 Constant *StepVal = ConstantInt::get(II.getType(), NumElts);
1467 }
1468
1470
1471 return MinNumElts && NumElts >= MinNumElts
1473 II, ConstantInt::get(II.getType(), MinNumElts)))
1474 : std::nullopt;
1475}
1476
1479 Value *PgVal = II.getArgOperand(0);
1480 Value *OpVal = II.getArgOperand(1);
1481
1482
1483
1484 if (PgVal == OpVal &&
1485 (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
1486 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
1487 Value *Ops[] = {PgVal, OpVal};
1489
1490 auto *PTest =
1493
1495 }
1496
1497 IntrinsicInst *Pg = dyn_cast(PgVal);
1499
1500 if (!Pg || )
1501 return std::nullopt;
1502
1504
1505 if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1506 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1510
1512
1515 }
1516
1517
1518
1519
1520 if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1521 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1522 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1523 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1524 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1525 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1526 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1527 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1528 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1529 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1530 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1531 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1532 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1535
1538
1540 }
1541
1542 return std::nullopt;
1543}
1544
1545template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc>
1546static std::optional<Instruction *>
1548 bool MergeIntoAddendOp) {
1549 Value *P = II.getOperand(0);
1550 Value *MulOp0, *MulOp1, *AddendOp, *Mul;
1551 if (MergeIntoAddendOp) {
1552 AddendOp = II.getOperand(1);
1554 } else {
1555 AddendOp = II.getOperand(2);
1557 }
1558
1561 return std::nullopt;
1562
1563 if (->hasOneUse())
1564 return std::nullopt;
1565
1567 if (II.getType()->isFPOrFPVectorTy()) {
1569
1570
1571 if (FAddFlags != cast(Mul)->getFastMathFlags())
1572 return std::nullopt;
1574 return std::nullopt;
1576 }
1577
1579 if (MergeIntoAddendOp)
1581 {P, AddendOp, MulOp0, MulOp1}, FMFSource);
1582 else
1584 {P, MulOp0, MulOp1, AddendOp}, FMFSource);
1585
1587}
1588
1589static std::optional<Instruction *>
1591 Value *Pred = II.getOperand(0);
1592 Value *PtrOp = II.getOperand(1);
1593 Type *VecTy = II.getType();
1594
1595
1597 return II_NA;
1598
1601 Load->copyMetadata(II);
1603 }
1604
1610}
1611
1612static std::optional<Instruction *>
1614 Value *VecOp = II.getOperand(0);
1615 Value *Pred = II.getOperand(1);
1616 Value *PtrOp = II.getOperand(2);
1617
1620 Store->copyMetadata(II);
1622 }
1623
1628}
1629
1631 switch (Intrinsic) {
1632 case Intrinsic::aarch64_sve_fmul_u:
1633 return Instruction::BinaryOps::FMul;
1634 case Intrinsic::aarch64_sve_fadd_u:
1635 return Instruction::BinaryOps::FAdd;
1636 case Intrinsic::aarch64_sve_fsub_u:
1637 return Instruction::BinaryOps::FSub;
1638 default:
1639 return Instruction::BinaryOpsEnd;
1640 }
1641}
1642
1643static std::optional<Instruction *>
1645
1646 if (II.isStrictFP())
1647 return std::nullopt;
1648
1649 auto *OpPredicate = II.getOperand(0);
1651 if (BinOpCode == Instruction::BinaryOpsEnd ||
1652 (OpPredicate, m_IntrinsicIntrinsic::aarch64\_sve\_ptrue(
1653 m_ConstantIntAArch64SVEPredPattern::all())))
1654 return std::nullopt;
1656 BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
1658}
1659
1660
1661
1664 auto *OpPredicate = II.getOperand(0);
1665 if ((OpPredicate, m_IntrinsicIntrinsic::aarch64\_sve\_ptrue(
1666 m_ConstantIntAArch64SVEPredPattern::all())))
1667 return std::nullopt;
1668
1669 auto *Mod = II.getModule();
1671 II.setCalledFunction(NewDecl);
1672
1673 return &II;
1674}
1675
1676
1677
1678static std::optional<Instruction *>
1682
1683
1685 }
1687}
1688
1691 if (auto II_U =
1693 return II_U;
1695 Intrinsic::aarch64_sve_mla>(
1696 IC, II, true))
1697 return MLA;
1699 Intrinsic::aarch64_sve_mad>(
1700 IC, II, false))
1701 return MAD;
1702 return std::nullopt;
1703}
1704
1705static std::optional<Instruction *>
1707 if (auto II_U =
1709 return II_U;
1710 if (auto FMLA =
1712 Intrinsic::aarch64_sve_fmla>(IC, II,
1713 true))
1714 return FMLA;
1715 if (auto FMAD =
1717 Intrinsic::aarch64_sve_fmad>(IC, II,
1718 false))
1719 return FMAD;
1720 if (auto FMLA =
1722 Intrinsic::aarch64_sve_fmla>(IC, II,
1723 true))
1724 return FMLA;
1725 return std::nullopt;
1726}
1727
1728static std::optional<Instruction *>
1730 if (auto FMLA =
1732 Intrinsic::aarch64_sve_fmla>(IC, II,
1733 true))
1734 return FMLA;
1735 if (auto FMAD =
1737 Intrinsic::aarch64_sve_fmad>(IC, II,
1738 false))
1739 return FMAD;
1740 if (auto FMLA_U =
1742 Intrinsic::aarch64_sve_fmla_u>(
1743 IC, II, true))
1744 return FMLA_U;
1746}
1747
1748static std::optional<Instruction *>
1750 if (auto II_U =
1752 return II_U;
1753 if (auto FMLS =
1755 Intrinsic::aarch64_sve_fmls>(IC, II,
1756 true))
1757 return FMLS;
1758 if (auto FMSB =
1760 Intrinsic::aarch64_sve_fnmsb>(
1761 IC, II, false))
1762 return FMSB;
1763 if (auto FMLS =
1765 Intrinsic::aarch64_sve_fmls>(IC, II,
1766 true))
1767 return FMLS;
1768 return std::nullopt;
1769}
1770
1771static std::optional<Instruction *>
1773 if (auto FMLS =
1775 Intrinsic::aarch64_sve_fmls>(IC, II,
1776 true))
1777 return FMLS;
1778 if (auto FMSB =
1780 Intrinsic::aarch64_sve_fnmsb>(
1781 IC, II, false))
1782 return FMSB;
1783 if (auto FMLS_U =
1785 Intrinsic::aarch64_sve_fmls_u>(
1786 IC, II, true))
1787 return FMLS_U;
1789}
1790
1793 if (auto II_U =
1795 return II_U;
1797 Intrinsic::aarch64_sve_mls>(
1798 IC, II, true))
1799 return MLS;
1800 return std::nullopt;
1801}
1802
1806 auto *OpPredicate = II.getOperand(0);
1807 auto *OpMultiplicand = II.getOperand(1);
1808 auto *OpMultiplier = II.getOperand(2);
1809
1810
1811 auto IsUnitSplat = [](auto *I) {
1813 if (!SplatValue)
1814 return false;
1816 };
1817
1818
1819
1820 auto IsUnitDup = [](auto *I) {
1821 auto *IntrI = dyn_cast(I);
1822 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1823 return false;
1824
1825 auto *SplatValue = IntrI->getOperand(2);
1827 };
1828
1829 if (IsUnitSplat(OpMultiplier)) {
1830
1831 OpMultiplicand->takeName(&II);
1833 } else if (IsUnitDup(OpMultiplier)) {
1834
1835 auto *DupInst = cast(OpMultiplier);
1836 auto *DupPg = DupInst->getOperand(1);
1837
1838
1839 if (OpPredicate == DupPg) {
1840 OpMultiplicand->takeName(&II);
1842 }
1843 }
1844
1846}
1847
1850 Value *UnpackArg = II.getArgOperand(0);
1851 auto *RetTy = cast(II.getType());
1852 bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1853 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
1854
1855
1856
1857 if (auto *ScalarArg = getSplatValue(UnpackArg)) {
1858 ScalarArg =
1864 }
1865
1866 return std::nullopt;
1867}
1870 auto *OpVal = II.getOperand(0);
1871 auto *OpIndices = II.getOperand(1);
1872 VectorType *VTy = cast(II.getType());
1873
1874
1875
1876 auto *SplatValue = dyn_cast_or_null(getSplatValue(OpIndices));
1877 if (!SplatValue ||
1878 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1879 return std::nullopt;
1880
1881
1882
1884 auto *VectorSplat =
1886
1889}
1890
1895 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1896 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1897
1898
1899
1900 if ((match(II.getArgOperand(0),
1901 m_Intrinsic(m_Intrinsic(m_Value(A)))) &&
1902 match(II.getArgOperand(1),
1903 m_Intrinsic(m_Intrinsic(m_Value(B))))) ||
1904 (match(II.getArgOperand(0), m_Intrinsic(m_Value(A))) &&
1905 match(II.getArgOperand(1), m_Intrinsic(m_Value(B))))) {
1906 auto *TyA = cast(A->getType());
1907 if (TyA == B->getType() &&
1915 }
1916 }
1917
1918 return std::nullopt;
1919}
1920
1923
1924
1926 if (match(II.getArgOperand(0),
1927 m_IntrinsicIntrinsic::aarch64\_sve\_uzp1(m_Value(A), m_Value(B))) &&
1928 match(II.getArgOperand(1), m_IntrinsicIntrinsic::aarch64\_sve\_uzp2(
1931 II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));
1932
1933 return std::nullopt;
1934}
1935
1936static std::optional<Instruction *>
1938 Value *Mask = II.getOperand(0);
1939 Value *BasePtr = II.getOperand(1);
1940 Value *Index = II.getOperand(2);
1943
1944
1946 return II_NA;
1947
1948
1949
1950
1951 Value *IndexBase;
1952 if (match(Index, m_IntrinsicIntrinsic::aarch64\_sve\_index(
1954 Align Alignment =
1955 BasePtr->getPointerAlignment(II.getDataLayout());
1956
1958 BasePtr, IndexBase);
1963 }
1964
1965 return std::nullopt;
1966}
1967
1968static std::optional<Instruction *>
1970 Value *Val = II.getOperand(0);
1971 Value *Mask = II.getOperand(1);
1972 Value *BasePtr = II.getOperand(2);
1973 Value *Index = II.getOperand(3);
1975
1976
1977
1978
1979 Value *IndexBase;
1980 if (match(Index, m_IntrinsicIntrinsic::aarch64\_sve\_index(
1982 Align Alignment =
1983 BasePtr->getPointerAlignment(II.getDataLayout());
1984
1986 BasePtr, IndexBase);
1988
1990 }
1991
1992 return std::nullopt;
1993}
1994
1998 Value *Pred = II.getOperand(0);
1999 Value *Vec = II.getOperand(1);
2000 Value *DivVec = II.getOperand(2);
2001
2003 ConstantInt *SplatConstantInt = dyn_cast_or_null(SplatValue);
2004 if (!SplatConstantInt)
2005 return std::nullopt;
2006
2008 const int64_t DivisorValue = Divisor.getSExtValue();
2009 if (DivisorValue == -1)
2010 return std::nullopt;
2011 if (DivisorValue == 1)
2013
2015 Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
2017 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
2019 }
2022 Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
2024 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
2026 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2028 }
2029
2030 return std::nullopt;
2031}
2032
2034 size_t VecSize = Vec.size();
2035 if (VecSize == 1)
2036 return true;
2038 return false;
2039 size_t HalfVecSize = VecSize / 2;
2040
2041 for (auto LHS = Vec.begin(), RHS = Vec.begin() + HalfVecSize;
2043 if (*LHS != nullptr && *RHS != nullptr) {
2045 continue;
2046 else
2047 return false;
2048 }
2049 if (!AllowPoison)
2050 return false;
2051 if (*LHS == nullptr && *RHS != nullptr)
2053 }
2054
2055 Vec.resize(HalfVecSize);
2057 return true;
2058}
2059
2060
2061
2064 Value *CurrentInsertElt = nullptr, *Default = nullptr;
2065 if ((II.getOperand(0),
2066 m_IntrinsicIntrinsic::vector\_insert(
2068 !isa(CurrentInsertElt->getType()))
2069 return std::nullopt;
2070 auto IIScalableTy = cast(II.getType());
2071
2072
2074 while (auto InsertElt = dyn_cast(CurrentInsertElt)) {
2075 auto Idx = cast(InsertElt->getOperand(2));
2076 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2077 CurrentInsertElt = InsertElt->getOperand(0);
2078 }
2079
2080 bool AllowPoison =
2081 isa(CurrentInsertElt) && isa(Default);
2083 return std::nullopt;
2084
2085
2087 for (size_t I = 0; I < Elts.size(); I++) {
2088 if (Elts[I] == nullptr)
2089 continue;
2092 }
2093 if (InsertEltChain == nullptr)
2094 return std::nullopt;
2095
2096
2097
2098
2099
2100 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.size();
2101 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2102 IIScalableTy->getMinNumElements() /
2103 PatternWidth;
2104
2107 auto *WideShuffleMaskTy =
2109
2113 auto WideBitcast =
2117 WideBitcast, PoisonValue::get(WideScalableTy), WideShuffleMask);
2118 auto NarrowBitcast =
2120
2122}
2123
2126 Value *A = II.getArgOperand(0);
2127 Value *B = II.getArgOperand(1);
2130
2131 return std::nullopt;
2132}
2133
2136 Value *Pred = II.getOperand(0);
2137 Value *Vec = II.getOperand(1);
2138 Value *Shift = II.getOperand(2);
2139
2140
2141 Value *AbsPred, *MergedValue;
2142 if ((Vec, m_IntrinsicIntrinsic::aarch64\_sve\_sqabs(
2144 (Vec, m_IntrinsicIntrinsic::aarch64\_sve\_abs(
2146
2147 return std::nullopt;
2148
2149
2150
2151
2152
2153 if (!isa(MergedValue) && (MergedValue, m_NonNegative()) &&
2155 return std::nullopt;
2156
2157
2158
2160 return std::nullopt;
2161
2163 {II.getType()}, {Pred, Vec, Shift});
2164
2166}
2167
2170 Value *Vec = II.getOperand(0);
2171
2174
2175 return std::nullopt;
2176}
2177
2180
2181 auto *NI = II.getNextNonDebugInstruction();
2184 return ->mayReadOrWriteMemory() &&
->mayHaveSideEffects();
2185 };
2186 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2187 auto *NIBB = NI->getParent();
2188 NI = NI->getNextNonDebugInstruction();
2189 if (!NI) {
2190 if (auto *SuccBB = NIBB->getUniqueSuccessor())
2191 NI = SuccBB->getFirstNonPHIOrDbgOrLifetime();
2192 else
2193 break;
2194 }
2195 }
2196 auto *NextII = dyn_cast_or_null(NI);
2197 if (NextII && II.isIdenticalTo(NextII))
2199
2200 return std::nullopt;
2201}
2202
2203std::optional<Instruction *>
2207 switch (IID) {
2208 default:
2209 break;
2210 case Intrinsic::aarch64_dmb:
2212 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
2213 case Intrinsic::aarch64_sve_fcvt_f16f32:
2214 case Intrinsic::aarch64_sve_fcvt_f16f64:
2215 case Intrinsic::aarch64_sve_fcvt_f32f16:
2216 case Intrinsic::aarch64_sve_fcvt_f32f64:
2217 case Intrinsic::aarch64_sve_fcvt_f64f16:
2218 case Intrinsic::aarch64_sve_fcvt_f64f32:
2219 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
2220 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
2221 case Intrinsic::aarch64_sve_fcvtx_f32f64:
2222 case Intrinsic::aarch64_sve_fcvtzs:
2223 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
2224 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
2225 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
2226 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
2227 case Intrinsic::aarch64_sve_fcvtzu:
2228 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
2229 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
2230 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
2231 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
2232 case Intrinsic::aarch64_sve_scvtf:
2233 case Intrinsic::aarch64_sve_scvtf_f16i32:
2234 case Intrinsic::aarch64_sve_scvtf_f16i64:
2235 case Intrinsic::aarch64_sve_scvtf_f32i64:
2236 case Intrinsic::aarch64_sve_scvtf_f64i32:
2237 case Intrinsic::aarch64_sve_ucvtf:
2238 case Intrinsic::aarch64_sve_ucvtf_f16i32:
2239 case Intrinsic::aarch64_sve_ucvtf_f16i64:
2240 case Intrinsic::aarch64_sve_ucvtf_f32i64:
2241 case Intrinsic::aarch64_sve_ucvtf_f64i32:
2243 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
2244 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2245 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
2246 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
2248 case Intrinsic::aarch64_sve_st1_scatter:
2249 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
2250 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
2251 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
2252 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
2253 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
2254 case Intrinsic::aarch64_sve_st1dq:
2255 case Intrinsic::aarch64_sve_st1q_scatter_index:
2256 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
2257 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
2258 case Intrinsic::aarch64_sve_st1wq:
2259 case Intrinsic::aarch64_sve_stnt1:
2260 case Intrinsic::aarch64_sve_stnt1_scatter:
2261 case Intrinsic::aarch64_sve_stnt1_scatter_index:
2262 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
2263 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
2265 case Intrinsic::aarch64_sve_st2:
2266 case Intrinsic::aarch64_sve_st2q:
2268 case Intrinsic::aarch64_sve_st3:
2269 case Intrinsic::aarch64_sve_st3q:
2271 case Intrinsic::aarch64_sve_st4:
2272 case Intrinsic::aarch64_sve_st4q:
2274 case Intrinsic::aarch64_sve_addqv:
2275 case Intrinsic::aarch64_sve_and_z:
2276 case Intrinsic::aarch64_sve_bic_z:
2277 case Intrinsic::aarch64_sve_brka_z:
2278 case Intrinsic::aarch64_sve_brkb_z:
2279 case Intrinsic::aarch64_sve_brkn_z:
2280 case Intrinsic::aarch64_sve_brkpa_z:
2281 case Intrinsic::aarch64_sve_brkpb_z:
2282 case Intrinsic::aarch64_sve_cntp:
2283 case Intrinsic::aarch64_sve_compact:
2284 case Intrinsic::aarch64_sve_eor_z:
2285 case Intrinsic::aarch64_sve_eorv:
2286 case Intrinsic::aarch64_sve_eorqv:
2287 case Intrinsic::aarch64_sve_nand_z:
2288 case Intrinsic::aarch64_sve_nor_z:
2289 case Intrinsic::aarch64_sve_orn_z:
2290 case Intrinsic::aarch64_sve_orr_z:
2291 case Intrinsic::aarch64_sve_orv:
2292 case Intrinsic::aarch64_sve_orqv:
2293 case Intrinsic::aarch64_sve_pnext:
2294 case Intrinsic::aarch64_sve_rdffr_z:
2295 case Intrinsic::aarch64_sve_saddv:
2296 case Intrinsic::aarch64_sve_uaddv:
2297 case Intrinsic::aarch64_sve_umaxv:
2298 case Intrinsic::aarch64_sve_umaxqv:
2299 case Intrinsic::aarch64_sve_cmpeq:
2300 case Intrinsic::aarch64_sve_cmpeq_wide:
2301 case Intrinsic::aarch64_sve_cmpge:
2302 case Intrinsic::aarch64_sve_cmpge_wide:
2303 case Intrinsic::aarch64_sve_cmpgt:
2304 case Intrinsic::aarch64_sve_cmpgt_wide:
2305 case Intrinsic::aarch64_sve_cmphi:
2306 case Intrinsic::aarch64_sve_cmphi_wide:
2307 case Intrinsic::aarch64_sve_cmphs:
2308 case Intrinsic::aarch64_sve_cmphs_wide:
2309 case Intrinsic::aarch64_sve_cmple_wide:
2310 case Intrinsic::aarch64_sve_cmplo_wide:
2311 case Intrinsic::aarch64_sve_cmpls_wide:
2312 case Intrinsic::aarch64_sve_cmplt_wide:
2313 case Intrinsic::aarch64_sve_facge:
2314 case Intrinsic::aarch64_sve_facgt:
2315 case Intrinsic::aarch64_sve_fcmpeq:
2316 case Intrinsic::aarch64_sve_fcmpge:
2317 case Intrinsic::aarch64_sve_fcmpgt:
2318 case Intrinsic::aarch64_sve_fcmpne:
2319 case Intrinsic::aarch64_sve_fcmpuo:
2320 case Intrinsic::aarch64_sve_ld1_gather:
2321 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2322 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2323 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2324 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2325 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2326 case Intrinsic::aarch64_sve_ld1q_gather_index:
2327 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2328 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2329 case Intrinsic::aarch64_sve_ld1ro:
2330 case Intrinsic::aarch64_sve_ld1rq:
2331 case Intrinsic::aarch64_sve_ld1udq:
2332 case Intrinsic::aarch64_sve_ld1uwq:
2333 case Intrinsic::aarch64_sve_ld2_sret:
2334 case Intrinsic::aarch64_sve_ld2q_sret:
2335 case Intrinsic::aarch64_sve_ld3_sret:
2336 case Intrinsic::aarch64_sve_ld3q_sret:
2337 case Intrinsic::aarch64_sve_ld4_sret:
2338 case Intrinsic::aarch64_sve_ld4q_sret:
2339 case Intrinsic::aarch64_sve_ldff1:
2340 case Intrinsic::aarch64_sve_ldff1_gather:
2341 case Intrinsic::aarch64_sve_ldff1_gather_index:
2342 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2343 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2344 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2345 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2346 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2347 case Intrinsic::aarch64_sve_ldnf1:
2348 case Intrinsic::aarch64_sve_ldnt1:
2349 case Intrinsic::aarch64_sve_ldnt1_gather:
2350 case Intrinsic::aarch64_sve_ldnt1_gather_index:
2351 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2352 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2354 case Intrinsic::aarch64_sve_prf:
2355 case Intrinsic::aarch64_sve_prfb_gather_index:
2356 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
2357 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
2358 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
2359 case Intrinsic::aarch64_sve_prfd_gather_index:
2360 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
2361 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
2362 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
2363 case Intrinsic::aarch64_sve_prfh_gather_index:
2364 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
2365 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
2366 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
2367 case Intrinsic::aarch64_sve_prfw_gather_index:
2368 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
2369 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
2370 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
2372 case Intrinsic::aarch64_neon_fmaxnm:
2373 case Intrinsic::aarch64_neon_fminnm:
2375 case Intrinsic::aarch64_sve_convert_from_svbool:
2377 case Intrinsic::aarch64_sve_dup:
2379 case Intrinsic::aarch64_sve_dup_x:
2381 case Intrinsic::aarch64_sve_cmpne:
2382 case Intrinsic::aarch64_sve_cmpne_wide:
2384 case Intrinsic::aarch64_sve_rdffr:
2386 case Intrinsic::aarch64_sve_lasta:
2387 case Intrinsic::aarch64_sve_lastb:
2389 case Intrinsic::aarch64_sve_clasta_n:
2390 case Intrinsic::aarch64_sve_clastb_n:
2392 case Intrinsic::aarch64_sve_cntd:
2394 case Intrinsic::aarch64_sve_cntw:
2396 case Intrinsic::aarch64_sve_cnth:
2398 case Intrinsic::aarch64_sve_cntb:
2400 case Intrinsic::aarch64_sve_ptest_any:
2401 case Intrinsic::aarch64_sve_ptest_first:
2402 case Intrinsic::aarch64_sve_ptest_last:
2404 case Intrinsic::aarch64_sve_fabd:
2406 case Intrinsic::aarch64_sve_fadd:
2408 case Intrinsic::aarch64_sve_fadd_u:
2410 case Intrinsic::aarch64_sve_fdiv:
2412 case Intrinsic::aarch64_sve_fmax:
2414 case Intrinsic::aarch64_sve_fmaxnm:
2416 case Intrinsic::aarch64_sve_fmin:
2418 case Intrinsic::aarch64_sve_fminnm:
2420 case Intrinsic::aarch64_sve_fmla:
2422 case Intrinsic::aarch64_sve_fmls:
2424 case Intrinsic::aarch64_sve_fmul:
2425 if (auto II_U =
2427 return II_U;
2429 case Intrinsic::aarch64_sve_fmul_u:
2431 case Intrinsic::aarch64_sve_fmulx:
2433 case Intrinsic::aarch64_sve_fnmla:
2435 case Intrinsic::aarch64_sve_fnmls:
2437 case Intrinsic::aarch64_sve_fsub:
2439 case Intrinsic::aarch64_sve_fsub_u:
2441 case Intrinsic::aarch64_sve_add:
2443 case Intrinsic::aarch64_sve_add_u:
2445 Intrinsic::aarch64_sve_mla_u>(
2446 IC, II, true);
2447 case Intrinsic::aarch64_sve_mla:
2449 case Intrinsic::aarch64_sve_mls:
2451 case Intrinsic::aarch64_sve_mul:
2452 if (auto II_U =
2454 return II_U;
2456 case Intrinsic::aarch64_sve_mul_u:
2458 case Intrinsic::aarch64_sve_sabd:
2460 case Intrinsic::aarch64_sve_smax:
2462 case Intrinsic::aarch64_sve_smin:
2464 case Intrinsic::aarch64_sve_smulh:
2466 case Intrinsic::aarch64_sve_sub:
2468 case Intrinsic::aarch64_sve_sub_u:
2470 Intrinsic::aarch64_sve_mls_u>(
2471 IC, II, true);
2472 case Intrinsic::aarch64_sve_uabd:
2474 case Intrinsic::aarch64_sve_umax:
2476 case Intrinsic::aarch64_sve_umin:
2478 case Intrinsic::aarch64_sve_umulh:
2480 case Intrinsic::aarch64_sve_asr:
2482 case Intrinsic::aarch64_sve_lsl:
2484 case Intrinsic::aarch64_sve_lsr:
2486 case Intrinsic::aarch64_sve_and:
2488 case Intrinsic::aarch64_sve_bic:
2490 case Intrinsic::aarch64_sve_eor:
2492 case Intrinsic::aarch64_sve_orr:
2494 case Intrinsic::aarch64_sve_sqsub:
2496 case Intrinsic::aarch64_sve_uqsub:
2498 case Intrinsic::aarch64_sve_tbl:
2500 case Intrinsic::aarch64_sve_uunpkhi:
2501 case Intrinsic::aarch64_sve_uunpklo:
2502 case Intrinsic::aarch64_sve_sunpkhi:
2503 case Intrinsic::aarch64_sve_sunpklo:
2505 case Intrinsic::aarch64_sve_uzp1:
2507 case Intrinsic::aarch64_sve_zip1:
2508 case Intrinsic::aarch64_sve_zip2:
2510 case Intrinsic::aarch64_sve_ld1_gather_index:
2512 case Intrinsic::aarch64_sve_st1_scatter_index:
2514 case Intrinsic::aarch64_sve_ld1:
2516 case Intrinsic::aarch64_sve_st1:
2518 case Intrinsic::aarch64_sve_sdiv:
2520 case Intrinsic::aarch64_sve_sel:
2522 case Intrinsic::aarch64_sve_srshl:
2524 case Intrinsic::aarch64_sve_dupq_lane:
2526 case Intrinsic::aarch64_sve_insr:
2528 }
2529
2530 return std::nullopt;
2531}
2532
2537 SimplifyAndSetOp) const {
2538 switch (II.getIntrinsicID()) {
2539 default:
2540 break;
2541 case Intrinsic::aarch64_neon_fcvtxn:
2542 case Intrinsic::aarch64_neon_rshrn:
2543 case Intrinsic::aarch64_neon_sqrshrn:
2544 case Intrinsic::aarch64_neon_sqrshrun:
2545 case Intrinsic::aarch64_neon_sqshrn:
2546 case Intrinsic::aarch64_neon_sqshrun:
2547 case Intrinsic::aarch64_neon_sqxtn:
2548 case Intrinsic::aarch64_neon_sqxtun:
2549 case Intrinsic::aarch64_neon_uqrshrn:
2550 case Intrinsic::aarch64_neon_uqshrn:
2551 case Intrinsic::aarch64_neon_uqxtn:
2552 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
2553 break;
2554 }
2555
2556 return std::nullopt;
2557}
2558
2562}
2563
2566 switch (K) {
2576 else
2582 else
2584 }
2586}
2587
2588bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
2590 Type *SrcOverrideTy) {
2591
2592
2595 cast(DstTy)->getElementCount());
2596 };
2597
2598
2599
2600
2601
2602
2604 if ((DstTy) || Args.size() != 2 ||
2605 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2606 return false;
2607
2608
2609
2610
2611
2612
2613
2614
2615 Type *SrcTy = SrcOverrideTy;
2616 switch (Opcode) {
2617 case Instruction::Add:
2618 case Instruction::Sub:
2619
2620 if (isa(Args[1]) || isa(Args[1])) {
2621 if (!SrcTy)
2622 SrcTy =
2623 toVectorTy(cast(Args[1])->getOperand(0)->getType());
2624 } else
2625 return false;
2626 break;
2627 case Instruction::Mul: {
2628
2629 if ((isa(Args[0]) && isa(Args[1])) ||
2630 (isa(Args[0]) && isa(Args[1]))) {
2631 if (!SrcTy)
2632 SrcTy =
2633 toVectorTy(cast(Args[0])->getOperand(0)->getType());
2634 } else if (isa(Args[0]) || isa(Args[1])) {
2635
2636
2637
2643 return false;
2644 if (!SrcTy)
2647 } else
2648 return false;
2649 break;
2650 }
2651 default:
2652 return false;
2653 }
2654
2655
2656
2658 if (!DstTyL.second.isVector() || DstEltSize != DstTy->getScalarSizeInBits())
2659 return false;
2660
2661
2662
2663 assert(SrcTy && "Expected some SrcTy");
2665 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2666 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
2667 return false;
2668
2669
2671 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2673 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2674
2675
2676
2677 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2678}
2679
2680
2681
2682
2683
2684
2685
2687 Type *Src) {
2688
2690 (Src->isScalableTy() && !ST->hasSVE2()))
2691 return false;
2692
2693 if (ExtUser->getOpcode() != Instruction::Add || !ExtUser->hasOneUse())
2694 return false;
2695
2696
2698 auto *AddUser =
2699 dyn_cast_or_null(Add->getUniqueUndroppableUser());
2700 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2701 Add = AddUser;
2702
2703 auto *Shr = dyn_cast_or_null(Add->getUniqueUndroppableUser());
2704 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2705 return false;
2706
2707 auto *Trunc = dyn_cast_or_null(Shr->getUniqueUndroppableUser());
2708 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2709 Src->getScalarSizeInBits() !=
2710 cast(Trunc)->getDestTy()->getScalarSizeInBits())
2711 return false;
2712
2713
2714
2718 return false;
2719
2720
2723 return true;
2724
2725 return false;
2726}
2727
2734 assert(ISD && "Invalid opcode");
2735
2736
2737 if (I && I->hasOneUser()) {
2738 auto *SingleUser = cast(*I->user_begin());
2740 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands, Src)) {
2741
2742
2743
2744 if (SingleUser->getOpcode() == Instruction::Add) {
2745 if (I == SingleUser->getOperand(1) ||
2746 (isa(SingleUser->getOperand(1)) &&
2747 cast(SingleUser->getOperand(1))->getOpcode() == Opcode))
2748 return 0;
2749 } else
2750 return 0;
2751 }
2752
2753
2754 if ((isa(I) || isa(I)) &&
2756 return 0;
2757 }
2758
2759
2762 return Cost == 0 ? 0 : 1;
2763 return Cost;
2764 };
2765
2768
2770 return AdjustCost(
2772
2774 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1},
2775 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1},
2776 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1},
2777 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2},
2778 {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2},
2779 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3},
2780 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6},
2781 };
2782
2783 if (ST->hasBF16())
2786 return AdjustCost(Entry->Cost);
2787
2789 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1},
2790 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1},
2791 {ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1},
2792 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1},
2793 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 3},
2794 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1},
2795 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2},
2796 {ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1},
2797 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1},
2798 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2},
2799 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 4},
2800 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1},
2801 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 3},
2802 {ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 2},
2803 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 1},
2804 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 3},
2805 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 7},
2806 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2},
2807 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i64, 6},
2808 {ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4},
2809
2810
2841 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i16, 1},
2842 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i32, 3},
2843 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i64, 7},
2844
2845
2862
2863
2865 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
2866 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
2867
2870 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
2871 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
2872 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
2873 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
2874 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
2875
2877 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
2878 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
2879 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
2880 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
2881 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
2882 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
2883
2885 {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1},
2886 {ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 2},
2887
2890 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, 1},
2891 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, 2},
2892 {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2},
2893 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3},
2894 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6},
2895
2896 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8},
2897 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9},
2904
2905
2912
2913
2920
2921
2926
2927
2932
2933
2936
2937
2944
2945
2948
2949
2956
2957
2964
2965
2970
2971
2980
2981
2988
2989
2998
2999
3008
3009
3014
3015
3022
3023
3028
3029
3038
3039
3048
3049
3058
3059
3063
3064
3068
3069
3073
3074
3078
3079
3083
3084
3088
3089
3090 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3091 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3092 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3093
3094
3095 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3096 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3097 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3098
3099
3100
3101
3108
3115 };
3116
3117
3118
3119
3120 EVT WiderTy = SrcTy.bitsGT(DstTy) ? SrcTy : DstTy;
3124 std::pair<InstructionCost, MVT> LT =
3126 unsigned NumElements =
3128 return AdjustCost(
3129 LT.first *
3134 }
3135
3138 return AdjustCost(Entry->Cost);
3139
3145 {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f16, 2},
3147 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f16, 2},
3151 {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f16, 4},
3153 {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f16, 3},
3155 {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f16, 2},
3157 {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f16, 8},
3159 {ISD::UINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},
3160 {ISD::SINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},
3161 {ISD::UINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},
3162 {ISD::SINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},
3163 };
3164
3165 if (ST->hasFullFP16())
3168 return AdjustCost(Entry->Cost);
3169
3177
3178
3179
3180
3184 Opcode, LegalTy, Src, CCH, CostKind, I);
3187 return Part1 + Part2;
3188 }
3189
3190
3191
3196
3197 return AdjustCost(
3199}
3200
3204 unsigned Index) {
3205
3206
3207 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3208 "Invalid opcode");
3209
3210
3211
3213
3214
3215 assert(isa(Dst) && isa(Src) && "Invalid type");
3216
3217
3218
3221 CostKind, Index, nullptr, nullptr);
3222
3223
3227
3228
3229
3230
3231 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3234
3235
3236
3237 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3240
3241 switch (Opcode) {
3242 default:
3244
3245
3246
3247 case Instruction::SExt:
3248 return Cost;
3249
3250
3251
3252 case Instruction::ZExt:
3253 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3254 return Cost;
3255 }
3256
3257
3260}
3261
3266 return Opcode == Instruction::PHI ? 0 : 1;
3268
3269 return 0;
3270}
3271
3272InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
3273 unsigned Opcode, Type *Val, unsigned Index, bool HasRealUse,
3275 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3277
3278 if (Index != -1U) {
3279
3281
3282
3283 if (!LT.second.isVector())
3284 return 0;
3285
3286
3287
3288 if (LT.second.isFixedLengthVector()) {
3289 unsigned Width = LT.second.getVectorNumElements();
3290 Index = Index % Width;
3291 }
3292
3293
3294
3295
3296
3297
3298
3300 return 0;
3301
3302
3303
3304
3305
3306 if (I && dyn_cast(I->getOperand(1)))
3308
3309
3310
3313
3314
3315
3316
3317
3318
3319 }
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336 auto ExtractCanFuseWithFmul = [&]() {
3337
3338 if (Index == 0)
3339 return false;
3340
3341
3342
3343 auto IsAllowedScalarTy = [&](const Type *T) {
3344 return T->isFloatTy() || T->isDoubleTy() ||
3345 (T->isHalfTy() && ST->hasFullFP16());
3346 };
3347
3348
3349 auto IsUserFMulScalarTy = [](const Value *EEUser) {
3350
3351 const auto *BO = dyn_cast(EEUser);
3352 return BO && BO->getOpcode() == BinaryOperator::FMul &&
3353 !BO->getType()->isVectorTy();
3354 };
3355
3356
3357
3358 auto IsExtractLaneEquivalentToZero = [&](unsigned Idx, unsigned EltSz) {
3359 auto RegWidth =
3362 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
3363 };
3364
3365
3366
3367 if (!isa(Val) || !IsAllowedScalarTy(Val->getScalarType()))
3368 return false;
3369
3370 if (Scalar) {
3372 for (auto *U : Scalar->users()) {
3373 if (!IsUserFMulScalarTy(U))
3374 return false;
3375
3376
3377 UserToExtractIdx[U];
3378 }
3379 if (UserToExtractIdx.empty())
3380 return false;
3381 for (auto &[S, U, L] : ScalarUserAndIdx) {
3382 for (auto *U : S->users()) {
3383 if (UserToExtractIdx.find(U) != UserToExtractIdx.end()) {
3384 auto *FMul = cast(U);
3385 auto *Op0 = FMul->getOperand(0);
3386 auto *Op1 = FMul->getOperand(1);
3387 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
3389 break;
3390 }
3391 }
3392 }
3393 }
3394 for (auto &[U, L] : UserToExtractIdx) {
3395 if (!IsExtractLaneEquivalentToZero(Index, Val->getScalarSizeInBits()) &&
3397 return false;
3398 }
3399 } else {
3400 const auto *EE = cast(I);
3401
3402 const auto *IdxOp = dyn_cast(EE->getIndexOperand());
3403 if (!IdxOp)
3404 return false;
3405
3406 return !EE->users().empty() && all_of(EE->users(), [&](const User *U) {
3407 if (!IsUserFMulScalarTy(U))
3408 return false;
3409
3410
3411
3412 const auto *BO = cast(U);
3413 const auto *OtherEE = dyn_cast(
3414 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
3415 if (OtherEE) {
3416 const auto *IdxOp = dyn_cast(OtherEE->getIndexOperand());
3417 if (!IdxOp)
3418 return false;
3419 return IsExtractLaneEquivalentToZero(
3420 cast(OtherEE->getIndexOperand())
3421 ->getValue()
3422 .getZExtValue(),
3423 OtherEE->getType()->getScalarSizeInBits());
3424 }
3425 return true;
3426 });
3427 }
3428 return true;
3429 };
3430
3431 if (Opcode == Instruction::ExtractElement && (I || Scalar) &&
3432 ExtractCanFuseWithFmul())
3433 return 0;
3434
3435
3436 return ST->getVectorInsertExtractBaseCost();
3437}
3438
3441 unsigned Index, Value *Op0,
3443 bool HasRealUse =
3444 Opcode == Instruction::InsertElement && Op0 && !isa(Op0);
3445 return getVectorInstrCostHelper(Opcode, Val, Index, HasRealUse);
3446}
3447
3451 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3452 return getVectorInstrCostHelper(Opcode, Val, Index, false, nullptr, Scalar,
3453 ScalarUserAndIdx);
3454}
3455
3459 unsigned Index) {
3460 return getVectorInstrCostHelper(I.getOpcode(), Val, Index,
3461 true , &I);
3462}
3463
3465 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
3467 if (isa(Ty))
3472 return DemandedElts.popcount() * (Insert + Extract) *
3474}
3475
3481
3482
3483
3484
3485
3486 if (auto *VTy = dyn_cast(Ty))
3489
3490
3493 Op2Info, Args, CxtI);
3494
3495
3498
3499 switch (ISD) {
3500 default:
3502 Op2Info);
3505
3506
3507
3508
3510 Instruction::Add, Ty, CostKind,
3515 Instruction::Select, Ty, CostKind,
3519 return Cost;
3520 }
3521 [[fallthrough]];
3526
3527
3528
3535 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
3536 }
3537 }
3538
3539
3540
3541
3542 if (!VT.isVector() && VT.getSizeInBits() > 64)
3544
3546 Opcode, Ty, CostKind, Op1Info, Op2Info);
3549
3550
3551 if (isa(Ty) && cast(Ty)
3552 ->getPrimitiveSizeInBits()
3553 .getFixedValue() < 128) {
3562
3564 if (nullptr != Entry)
3565 return Entry->Cost;
3566 }
3567
3568
3569 if (LT.second.getScalarType() == MVT::i8)
3571 else if (LT.second.getScalarType() == MVT::i16)
3573 return Cost;
3574 } else {
3575
3576
3577
3578
3581 if (auto *VTy = dyn_cast(Ty)) {
3584 return (4 + DivCost) * VTy->getNumElements();
3585 }
3586 }
3587
3588
3590 CostKind, Op1Info, Op2Info);
3592 Op1Info, Op2Info);
3593 }
3594
3595
3596
3598 }
3599 return Cost;
3600 }
3602
3603
3604 if (LT.second == MVT::v2i64 && ST->hasSVE())
3605 return LT.first;
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
3620 return LT.first;
3621 return cast(Ty)->getElementCount().getKnownMinValue() *
3624 nullptr, nullptr) *
3625 2 +
3627 nullptr, nullptr));
3635
3636
3637 return LT.first;
3638
3640
3642 (Ty->isHalfTy() && ST->hasFullFP16())) &&
3643 CxtI &&
3647 return 0;
3648 [[fallthrough]];
3651
3652
3655 return 2 * LT.first;
3657 return LT.first;
3658 [[fallthrough]];
3661
3662
3664 return 2 * LT.first;
3665
3667 Op2Info);
3669
3670
3674 Op2Info);
3675 }
3676}
3677
3681
3682
3683
3684
3686 int MaxMergeDistance = 64;
3687
3690 return NumVectorInstToHideOverhead;
3691
3692
3693
3694 return 1;
3695}
3696
3701
3704 Op1Info, Op2Info, I);
3705
3707
3708
3709 if (isa(ValTy) && ISD == ISD::SELECT) {
3710
3711 const int AmortizationCost = 20;
3712
3713
3714
3719 VecPred = CurrentPred;
3720 }
3721
3722
3727 static const auto ValidMinMaxTys = {
3728 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3729 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3730 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3731
3733 if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }) ||
3734 (ST->hasFullFP16() &&
3735 any_of(ValidFP16MinMaxTys, [<](MVT M) { return M == LT.second; })))
3736 return LT.first;
3737 }
3738
3740 VectorSelectTbl[] = {
3741 { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 },
3742 { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 },
3743 { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 },
3744 { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 },
3745 { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 },
3746 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
3747 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
3748 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
3749 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3750 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3751 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3752 };
3753
3760 return Entry->Cost;
3761 }
3762 }
3763
3764 if (isa(ValTy) && ISD == ISD::SETCC) {
3766
3767 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3768 return LT.first * 4;
3769 }
3770
3771
3772
3773
3779 return 0;
3780
3781
3782
3784 Op1Info, Op2Info, I);
3785}
3786
3790 if (ST->requiresStrictAlign()) {
3791
3792
3794 }
3795 Options.AllowOverlappingLoads = true;
3798
3799
3800
3801 Options.LoadSizes = {8, 4, 2, 1};
3802 Options.AllowedTailExpansions = {3, 5, 6};
3804}
3805
3807 return ST->hasSVE();
3808}
3809
3818 if (!LT.first.isValid())
3820
3821
3822 auto *VT = cast(Src);
3823 if (VT->getElementType()->isIntegerTy(1))
3825
3826
3827
3828
3829
3832
3833 return LT.first;
3834}
3835
3836
3837
3840 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
3841 "Should be called on only load or stores.");
3842 switch (Opcode) {
3843 case Instruction::Load:
3846 return ST->getGatherOverhead();
3847 break;
3848 case Instruction::Store:
3851 return ST->getScatterOverhead();
3852 break;
3853 default:
3855 }
3856}
3857
3859 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
3864 auto *VT = cast(DataTy);
3866 if (!LT.first.isValid())
3868
3869
3870 if (!LT.second.isVector() ||
3872 VT->getElementType()->isIntegerTy(1))
3874
3875
3876
3877
3878
3881
3882 ElementCount LegalVF = LT.second.getVectorElementCount();
3885 {TTI::OK_AnyValue, TTI::OP_None}, I);
3886
3889}
3890
3893}
3894
3902
3903 if (VT == MVT::Other)
3906
3908 if (!LT.first.isValid())
3910
3911
3912
3913
3914
3915
3916 if (auto *VTy = dyn_cast(Ty))
3918 (VTy->getElementType()->isIntegerTy(1) &&
3919 !VTy->getElementCount().isKnownMultipleOf(
3922
3923
3925 return LT.first;
3926
3928 return 1;
3929
3930 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3931 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
3932
3933
3934
3935
3936
3937 const int AmortizationCost = 6;
3938
3939 return LT.first * 2 * AmortizationCost;
3940 }
3941
3942
3944 return LT.first;
3945
3947
3949
3950 if (VT == MVT::v4i8)
3951 return 2;
3952
3953 return cast(Ty)->getNumElements() * 2;
3954 }
3957 if ((EltSize) || EltSize < 8 || EltSize > 64 ||
3959 *Alignment != Align(1))
3960 return LT.first;
3961
3962
3964 return LT.first;
3965
3966
3967
3968
3973 while (!TypeWorklist.empty()) {
3978 continue;
3979 }
3980
3981 unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;
3985 }
3986 return Cost;
3987 }
3988
3989 return LT.first;
3990}
3991
3995 bool UseMaskForCond, bool UseMaskForGaps) {
3996 assert(Factor >= 2 && "Invalid interleave factor");
3997 auto *VecVTy = cast(VecTy);
3998
4001
4002
4003
4004 if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4006
4007 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4008 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4009 auto *SubVecTy =
4011 VecVTy->getElementCount().divideCoefficientBy(Factor));
4012
4013
4014
4015
4016 bool UseScalable;
4017 if (MinElts % Factor == 0 &&
4020 }
4021
4024 UseMaskForCond, UseMaskForGaps);
4025}
4026
4031 for (auto *I : Tys) {
4032 if (->isVectorTy())
4033 continue;
4034 if (I->getScalarSizeInBits() * cast(I)->getNumElements() ==
4035 128)
4038 }
4039 return Cost;
4040}
4041
4044}
4045
4046
4047
4048
4049
4050static void
4053 enum { MaxStridedLoads = 7 };
4055 int StridedLoads = 0;
4056
4057
4058 for (const auto BB : L->blocks()) {
4059 for (auto &I : *BB) {
4060 LoadInst *LMemI = dyn_cast(&I);
4061 if (!LMemI)
4062 continue;
4063
4065 if (L->isLoopInvariant(PtrValue))
4066 continue;
4067
4068 const SCEV *LSCEV = SE.getSCEV(PtrValue);
4069 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast(LSCEV);
4070 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
4071 continue;
4072
4073
4074
4075
4076 ++StridedLoads;
4077
4078
4079 if (StridedLoads > MaxStridedLoads / 2)
4080 return StridedLoads;
4081 }
4082 }
4083 return StridedLoads;
4084 };
4085
4086 int StridedLoads = countStridedLoads(L, SE);
4087 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
4088 << " strided loads\n");
4089
4090
4091 if (StridedLoads) {
4092 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
4093 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
4095 }
4096}
4097
4098
4099
4100static void
4104
4105
4106
4107
4108
4109
4110 if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
4111 return;
4112
4114 if (isa(BTC) || isa(BTC) ||
4117 return;
4119 return;
4120
4121 int64_t Size = 0;
4122 for (auto *BB : L->getBlocks()) {
4123 for (auto &I : *BB) {
4125 return;
4129 }
4130 }
4131
4132
4134
4135
4136
4137 BasicBlock *Header = L->getHeader();
4138 if (Header == L->getLoopLatch()) {
4139 if (Size > 8)
4140 return;
4141
4144 for (auto *BB : L->blocks()) {
4145 for (auto &I : *BB) {
4147 if ()
4148 continue;
4151 continue;
4152 if (isa(&I))
4154 else
4155 Stores.push_back(cast(&I));
4156 }
4157 }
4158
4159
4160
4161 unsigned MaxInstsPerLine = 16;
4162 unsigned UC = 1;
4163 unsigned BestUC = 1;
4164 unsigned SizeWithBestUC = BestUC * Size;
4165 while (UC <= 8) {
4166 unsigned SizeWithUC = UC * Size;
4167 if (SizeWithUC > 48)
4168 break;
4169 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4170 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
4171 BestUC = UC;
4172 SizeWithBestUC = BestUC * Size;
4173 }
4174 UC++;
4175 }
4176
4177 if (BestUC == 1 || none_of(Stores, [&LoadedValues](StoreInst *SI) {
4178 return LoadedValues.contains(SI->getOperand(0));
4179 }))
4180 return;
4181
4184 return;
4185 }
4186
4187
4188
4189 auto *Term = dyn_cast(Header->getTerminator());
4190 auto *Latch = L->getLoopLatch();
4192 if (!Term || !Term->isConditional() || Preds.size() == 1 ||
4193 none_of(Preds, [Header](BasicBlock *Pred) { return Header == Pred; }) ||
4194 none_of(Preds, [L](BasicBlock *Pred) { return L->contains(Pred); }))
4195 return;
4196
4197 std::function<bool(Instruction *, unsigned)> DependsOnLoopLoad =
4199 if (isa(I) || L->isLoopInvariant(I) || Depth > 8)
4200 return false;
4201
4202 if (isa(I))
4203 return true;
4204
4205 return any_of(I->operands(), [&](Value *V) {
4206 auto *I = dyn_cast(V);
4207 return I && DependsOnLoopLoad(I, Depth + 1);
4208 });
4209 };
4214 DependsOnLoopLoad(I, 0)) {
4216 }
4217}
4218
4222
4224
4226
4227
4228
4229
4230 if (L->getLoopDepth() > 1)
4232
4233
4235
4236
4238 case AArch64Subtarget::AppleA14:
4239 case AArch64Subtarget::AppleA15:
4240 case AArch64Subtarget::AppleA16:
4241 case AArch64Subtarget::AppleM4:
4243 break;
4244 case AArch64Subtarget::Falkor:
4247 break;
4248 default:
4249 break;
4250 }
4251
4252
4253
4254
4255 for (auto *BB : L->getBlocks()) {
4256 for (auto &I : *BB) {
4257
4258 if (I.getType()->isVectorTy())
4259 return;
4260
4264 continue;
4265 }
4266 return;
4267 }
4268 }
4269 }
4270
4271
4272
4273
4274
4276 !ST->getSchedModel().isOutOfOrder()) {
4281
4284 }
4285}
4286
4290}
4291
4293 Type *ExpectedType) {
4295 default:
4296 return nullptr;
4297 case Intrinsic::aarch64_neon_st2:
4298 case Intrinsic::aarch64_neon_st3:
4299 case Intrinsic::aarch64_neon_st4: {
4300
4301 StructType *ST = dyn_cast(ExpectedType);
4302 if (!ST)
4303 return nullptr;
4304 unsigned NumElts = Inst->arg_size() - 1;
4305 if (ST->getNumElements() != NumElts)
4306 return nullptr;
4307 for (unsigned i = 0, e = NumElts; i != e; ++i) {
4309 return nullptr;
4310 }
4313 for (unsigned i = 0, e = NumElts; i != e; ++i) {
4316 }
4317 return Res;
4318 }
4319 case Intrinsic::aarch64_neon_ld2:
4320 case Intrinsic::aarch64_neon_ld3:
4321 case Intrinsic::aarch64_neon_ld4:
4322 if (Inst->getType() == ExpectedType)
4323 return Inst;
4324 return nullptr;
4325 }
4326}
4327
4331 default:
4332 break;
4333 case Intrinsic::aarch64_neon_ld2:
4334 case Intrinsic::aarch64_neon_ld3:
4335 case Intrinsic::aarch64_neon_ld4:
4336 Info.ReadMem = true;
4337 Info.WriteMem = false;
4339 break;
4340 case Intrinsic::aarch64_neon_st2:
4341 case Intrinsic::aarch64_neon_st3:
4342 case Intrinsic::aarch64_neon_st4:
4343 Info.ReadMem = false;
4344 Info.WriteMem = true;
4346 break;
4347 }
4348
4350 default:
4351 return false;
4352 case Intrinsic::aarch64_neon_ld2:
4353 case Intrinsic::aarch64_neon_st2:
4354 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
4355 break;
4356 case Intrinsic::aarch64_neon_ld3:
4357 case Intrinsic::aarch64_neon_st3:
4358 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
4359 break;
4360 case Intrinsic::aarch64_neon_ld4:
4361 case Intrinsic::aarch64_neon_st4:
4362 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
4363 break;
4364 }
4365 return true;
4366}
4367
4368
4369
4370
4371
4372
4374 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
4375 bool Considerable = false;
4376 AllowPromotionWithoutCommonHeader = false;
4377 if (!isa(&I))
4378 return false;
4379 Type *ConsideredSExtType =
4381 if (I.getType() != ConsideredSExtType)
4382 return false;
4383
4384
4385 for (const User *U : I.users()) {
4386 if (const GetElementPtrInst *GEPInst = dyn_cast(U)) {
4387 Considerable = true;
4388
4389
4390
4391 if (GEPInst->getNumOperands() > 2) {
4392 AllowPromotionWithoutCommonHeader = true;
4393 break;
4394 }
4395 }
4396 }
4397 return Considerable;
4398}
4399
4403 return true;
4404
4407 return false;
4408
4424 return true;
4425 default:
4426 return false;
4427 }
4428}
4429
4434
4435
4436
4437
4438 if (auto *VTy = dyn_cast(Ty))
4441
4443
4444 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
4446
4448 if (LT.first > 1) {
4452 }
4453
4454 return LegalizationCost + 2;
4455}
4456
4461 if (LT.first > 1) {
4464 LegalizationCost *= LT.first - 1;
4465 }
4466
4468 assert(ISD && "Invalid opcode");
4469
4470 switch (ISD) {
4476 return LegalizationCost + 2;
4477 default:
4479 }
4480}
4481
4484 std::optional FMF,
4486
4487
4488
4489
4490 if (auto *VTy = dyn_cast(ValTy))
4493
4495 if (auto *FixedVTy = dyn_cast(ValTy)) {
4498
4499
4500 return BaseCost + FixedVTy->getNumElements();
4501 }
4502
4503 if (Opcode != Instruction::FAdd)
4505
4506 auto *VTy = cast(ValTy);
4510 return Cost;
4511 }
4512
4513 if (isa(ValTy))
4515
4517 MVT MTy = LT.second;
4519 assert(ISD && "Invalid opcode");
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529 static const CostTblEntry CostTblNoPairwise[]{
4536 {ISD::OR, MVT::v8i8, 15},
4537 {ISD::OR, MVT::v16i8, 17},
4538 {ISD::OR, MVT::v4i16, 7},
4539 {ISD::OR, MVT::v8i16, 9},
4540 {ISD::OR, MVT::v2i32, 3},
4541 {ISD::OR, MVT::v4i32, 5},
4542 {ISD::OR, MVT::v2i64, 3},
4557 };
4558 switch (ISD) {
4559 default:
4560 break;
4563
4564
4565 MTy.isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
4566 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
4570
4571
4572
4573
4574
4575
4576
4577
4578 return (LT.first - 1) + Log2_32(NElts);
4579 }
4580 break;
4582 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
4583 return (LT.first - 1) + Entry->Cost;
4584 break;
4588 const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy);
4589 if (!Entry)
4590 break;
4591 auto *ValVTy = cast(ValTy);
4595 if (LT.first != 1) {
4596
4597
4601 ExtraCost *= LT.first - 1;
4602 }
4603
4604 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
4605 return Cost + ExtraCost;
4606 }
4607 break;
4608 }
4610}
4611
4627 };
4628
4629
4630
4631
4632
4635
4639 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
4641 : LT.second;
4644 if (Index < 0) {
4645 LegalizationCost =
4650 }
4651
4652
4653
4654 if (LT.second.getScalarType() == MVT::i1) {
4655 LegalizationCost +=
4660 }
4661 const auto *Entry =
4663 assert(Entry && "Illegal Type for Splice");
4664 LegalizationCost += Entry->Cost;
4665 return LegalizationCost * LT.first;
4666}
4667
4669 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
4672 std::optional BinOp) const {
4675
4676 if (Opcode != Instruction::Add)
4678
4679 if (InputTypeA != InputTypeB)
4681
4684
4689
4690 if (InputEVT == MVT::i8) {
4692 default:
4694 case 8:
4695 if (AccumEVT == MVT::i32)
4697 else if (AccumEVT != MVT::i64)
4699 break;
4700 case 16:
4701 if (AccumEVT == MVT::i64)
4703 else if (AccumEVT != MVT::i32)
4705 break;
4706 }
4707 } else if (InputEVT == MVT::i16) {
4708
4709
4712 } else
4714
4715
4716
4718 (OpAExtend != OpBExtend && !ST->hasMatMulInt8() &&
4721
4722 if (!BinOp || *BinOp != Instruction::Mul)
4724
4725 return Cost;
4726}
4727
4733
4734
4735
4736 if (!Mask.empty() && isa(Tp) && LT.second.isVector() &&
4738 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
4739
4740
4741
4742
4743
4744 if (Args.size() >= 1 && isa(Args[0]) &&
4747 return std::max(1, LT.first / 4);
4748
4749
4750
4751
4752
4758 return LT.first;
4759
4760 unsigned TpNumElts = Mask.size();
4761 unsigned LTNumElts = LT.second.getVectorNumElements();
4762 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
4766 for (unsigned N = 0; N < NumVecs; N++) {
4768
4769
4770 unsigned Source1, Source2;
4771 unsigned NumSources = 0;
4772 for (unsigned E = 0; E < LTNumElts; E++) {
4773 int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E]
4775 if (MaskElt < 0) {
4777 continue;
4778 }
4779
4780
4781
4782 unsigned Source = MaskElt / LTNumElts;
4783 if (NumSources == 0) {
4784 Source1 = Source;
4785 NumSources = 1;
4786 } else if (NumSources == 1 && Source != Source1) {
4787 Source2 = Source;
4788 NumSources = 2;
4789 } else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
4790 NumSources++;
4791 }
4792
4793
4794
4795 if (Source == Source1)
4796 NMask.push_back(MaskElt % LTNumElts);
4797 else if (Source == Source2)
4798 NMask.push_back(MaskElt % LTNumElts + LTNumElts);
4799 else
4800 NMask.push_back(MaskElt % LTNumElts);
4801 }
4802
4803
4804
4805 if (NumSources <= 2)
4808 NTp, NMask, CostKind, 0, nullptr, Args, CxtI);
4809 else
4810 Cost += LTNumElts;
4811 }
4812 return Cost;
4813 }
4814
4817
4818
4819
4820 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
4821 if (LT.second.is128BitVector() &&
4822 cast(SubTp)->getNumElements() ==
4823 LT.second.getVectorNumElements() / 2) {
4824 if (Index == 0)
4825 return 0;
4826 if (Index == (int)LT.second.getVectorNumElements() / 2)
4827 return 1;
4828 }
4830 }
4831
4832
4833
4834
4835
4836
4837
4838
4840 bool IsLoad = !Args.empty() && isa(Args[0]);
4841 if (IsLoad && LT.second.isVector() &&
4843 LT.second.getVectorElementCount()))
4844 return 0;
4845 }
4846
4847
4848
4851 all_of(Mask, [](int E) { return E < 8; }))
4853
4854
4855 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
4858 return M.value() < 0 || M.value() == (int)M.index();
4859 }))
4860 return 0;
4861
4862
4863
4864 unsigned Unused;
4865 if (LT.second.isFixedLengthVector() &&
4866 LT.second.getVectorNumElements() == Mask.size() &&
4868 (isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4869 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4870
4872 [&Mask](int M) { return M < 0 || M == Mask[0]; })))
4873 return 1;
4874
4879
4892
4893
4906
4907
4909 {TTI::SK_Select, MVT::v4i32, 2},
4912 {TTI::SK_Select, MVT::v4f32, 2},
4914
4929
4942
4957
4975
4993 };
4994 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
4995 return LT.first * Entry->Cost;
4996 }
4997
4998 if (Kind == TTI::SK_Splice && isa(Tp))
5000
5001
5002
5004 LT.second.getSizeInBits() <= 128 && SubTp) {
5006 if (SubLT.second.isVector()) {
5007 int NumElts = LT.second.getVectorNumElements();
5008 int NumSubElts = SubLT.second.getVectorNumElements();
5009 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
5010 return SubLT.first;
5011 }
5012 }
5013
5014
5015 if (IsExtractSubvector)
5018 CxtI);
5019}
5020
5025
5026
5028 if (isa(&I) || isa(&I)) {
5031 if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, Strides, true,
5032 false)
5033 .value_or(0) < 0)
5034 return true;
5035 }
5036 }
5037 }
5038 return false;
5039}
5040
5044 return ST->useFixedOverScalableIfEqualCost();
5045}
5046
5049}
5050
5052 if (!ST->hasSVE())
5053 return false;
5054
5055
5056
5057
5059 return false;
5060
5066
5067
5068
5069
5075
5077 Required))
5078 return false;
5079
5080
5081
5082 unsigned NumInsns = 0;
5084 NumInsns += BB->sizeWithoutDebug();
5085 }
5086
5087
5089}
5090
5093 StackOffset BaseOffset, bool HasBaseReg,
5094 int64_t Scale, unsigned AddrSpace) const {
5095
5096
5097
5098
5099
5100
5101
5106 AM.Scale = Scale;
5109
5110
5111 return AM.Scale != 0 && AM.Scale != 1;
5112 return -1;
5113}
5114
5117
5118
5119
5120
5121 if (I->getOpcode() == Instruction::Or &&
5122 isa(I->getNextNode()) &&
5123 cast(I->getNextNode())->isUnconditional())
5124 return true;
5125
5126 if (I->getOpcode() == Instruction::Add ||
5127 I->getOpcode() == Instruction::Sub)
5128 return true;
5129 }
5131}
5132
5135
5136
5137
5138
5139
5145
5147}
5148
5150 if (auto *Shuf = dyn_cast(V))
5151 return all_equal(Shuf->getShuffleMask());
5152 return false;
5153}
5154
5155
5156
5158 bool AllowSplat = false) {
5159
5161 return false;
5162
5163 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
5164 auto *FullTy = FullV->getType();
5165 auto *HalfTy = HalfV->getType();
5167 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
5168 };
5169
5170 auto extractHalf = [](Value *FullV, Value *HalfV) {
5171 auto *FullVT = cast(FullV->getType());
5172 auto *HalfVT = cast(HalfV->getType());
5173 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
5174 };
5175
5177 Value *S1Op1 = nullptr, *S2Op1 = nullptr;
5180 return false;
5181
5182
5183
5185 S1Op1 = nullptr;
5187 S2Op1 = nullptr;
5188
5189
5190
5191 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
5192 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
5193 return false;
5194
5195
5196
5197 int M1Start = 0;
5198 int M2Start = 0;
5199 int NumElements = cast(Op1->getType())->getNumElements() * 2;
5200 if ((S1Op1 &&
5202 (S2Op1 &&
5204 return false;
5205
5206 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
5207 (M2Start != 0 && M2Start != (NumElements / 2)))
5208 return false;
5209 if (S1Op1 && S2Op1 && M1Start != M2Start)
5210 return false;
5211
5212 return true;
5213}
5214
5215
5216
5218 auto areExtDoubled = [](Instruction *Ext) {
5219 return Ext->getType()->getScalarSizeInBits() ==
5220 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
5221 };
5222
5225 !areExtDoubled(cast(Ext1)) ||
5226 !areExtDoubled(cast(Ext2)))
5227 return false;
5228
5229 return true;
5230}
5231
5232
5234 Value *VectorOperand = nullptr;
5238 ElementIndex->getValue() == 1 &&
5239 isa(VectorOperand->getType()) &&
5240 cast(VectorOperand->getType())->getNumElements() == 2;
5241}
5242
5243
5246}
5247
5249
5250 auto *GEP = dyn_cast(Ptrs);
5251 if ( || GEP->getNumOperands() != 2)
5252 return false;
5253
5255 Value *Offsets = GEP->getOperand(1);
5256
5257
5258 if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
5259 return false;
5260
5261
5262 if (isa(Offsets) || isa(Offsets)) {
5263 auto *OffsetsInst = cast(Offsets);
5264 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
5265 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
5267 }
5268
5269
5270 return true;
5271}
5272
5273
5274
5275
5278 return true;
5281 Ops.push_back(&cast(Op)->getOperandUse(0));
5282 return true;
5283 }
5286 Value *ZExtOp = cast(Op)->getOperand(0);
5287 Ops.push_back(&cast(ZExtOp)->getOperandUse(0));
5288 Ops.push_back(&cast(Op)->getOperandUse(0));
5289 return true;
5290 }
5291 return false;
5292}
5293
5294
5295
5296
5300 switch (II->getIntrinsicID()) {
5301 case Intrinsic::aarch64_neon_smull:
5302 case Intrinsic::aarch64_neon_umull:
5304 true)) {
5307 return true;
5308 }
5309 [[fallthrough]];
5310
5311 case Intrinsic::fma:
5312 case Intrinsic::fmuladd:
5313 if (isa(I->getType()) &&
5314 cast(I->getType())->getElementType()->isHalfTy() &&
5315 !ST->hasFullFP16())
5316 return false;
5317 [[fallthrough]];
5318 case Intrinsic::aarch64_neon_sqdmull:
5319 case Intrinsic::aarch64_neon_sqdmulh:
5320 case Intrinsic::aarch64_neon_sqrdmulh:
5321
5326 return !Ops.empty();
5327 case Intrinsic::aarch64_neon_fmlal:
5328 case Intrinsic::aarch64_neon_fmlal2:
5329 case Intrinsic::aarch64_neon_fmlsl:
5330 case Intrinsic::aarch64_neon_fmlsl2:
5331
5336 return !Ops.empty();
5337 case Intrinsic::aarch64_sve_ptest_first:
5338 case Intrinsic::aarch64_sve_ptest_last:
5339 if (auto *IIOp = dyn_cast(II->getOperand(0)))
5340 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
5342 return !Ops.empty();
5343 case Intrinsic::aarch64_sme_write_horiz:
5344 case Intrinsic::aarch64_sme_write_vert:
5345 case Intrinsic::aarch64_sme_writeq_horiz:
5346 case Intrinsic::aarch64_sme_writeq_vert: {
5347 auto *Idx = dyn_cast(II->getOperand(1));
5348 if ( || Idx->getOpcode() != Instruction::Add)
5349 return false;
5351 return true;
5352 }
5353 case Intrinsic::aarch64_sme_read_horiz:
5354 case Intrinsic::aarch64_sme_read_vert:
5355 case Intrinsic::aarch64_sme_readq_horiz:
5356 case Intrinsic::aarch64_sme_readq_vert:
5357 case Intrinsic::aarch64_sme_ld1b_vert:
5358 case Intrinsic::aarch64_sme_ld1h_vert:
5359 case Intrinsic::aarch64_sme_ld1w_vert:
5360 case Intrinsic::aarch64_sme_ld1d_vert:
5361 case Intrinsic::aarch64_sme_ld1q_vert:
5362 case Intrinsic::aarch64_sme_st1b_vert:
5363 case Intrinsic::aarch64_sme_st1h_vert:
5364 case Intrinsic::aarch64_sme_st1w_vert:
5365 case Intrinsic::aarch64_sme_st1d_vert:
5366 case Intrinsic::aarch64_sme_st1q_vert:
5367 case Intrinsic::aarch64_sme_ld1b_horiz:
5368 case Intrinsic::aarch64_sme_ld1h_horiz:
5369 case Intrinsic::aarch64_sme_ld1w_horiz:
5370 case Intrinsic::aarch64_sme_ld1d_horiz:
5371 case Intrinsic::aarch64_sme_ld1q_horiz:
5372 case Intrinsic::aarch64_sme_st1b_horiz:
5373 case Intrinsic::aarch64_sme_st1h_horiz:
5374 case Intrinsic::aarch64_sme_st1w_horiz:
5375 case Intrinsic::aarch64_sme_st1d_horiz:
5376 case Intrinsic::aarch64_sme_st1q_horiz: {
5377 auto *Idx = dyn_cast(II->getOperand(3));
5378 if ( || Idx->getOpcode() != Instruction::Add)
5379 return false;
5381 return true;
5382 }
5383 case Intrinsic::aarch64_neon_pmull:
5385 return false;
5388 return true;
5389 case Intrinsic::aarch64_neon_pmull64:
5391 II->getArgOperand(1)))
5392 return false;
5393 Ops.push_back(&II->getArgOperandUse(0));
5394 Ops.push_back(&II->getArgOperandUse(1));
5395 return true;
5396 case Intrinsic::masked_gather:
5398 return false;
5399 Ops.push_back(&II->getArgOperandUse(0));
5400 return true;
5401 case Intrinsic::masked_scatter:
5403 return false;
5404 Ops.push_back(&II->getArgOperandUse(1));
5405 return true;
5406 default:
5407 return false;
5408 }
5409 }
5410
5411 auto ShouldSinkCondition = [](Value *Cond) -> bool {
5412 auto *II = dyn_cast(Cond);
5413 return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
5414 isa(II->getOperand(0)->getType());
5415 };
5416
5417 switch (I->getOpcode()) {
5418 case Instruction::GetElementPtr:
5419 case Instruction::Add:
5420 case Instruction::Sub:
5421
5422 for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
5425 return true;
5426 }
5427 }
5428 break;
5429 case Instruction::Select: {
5430 if (!ShouldSinkCondition(I->getOperand(0)))
5431 return false;
5432
5433 Ops.push_back(&I->getOperandUse(0));
5434 return true;
5435 }
5436 case Instruction::Br: {
5437 if (cast(I)->isUnconditional())
5438 return false;
5439
5440 if (!ShouldSinkCondition(cast(I)->getCondition()))
5441 return false;
5442
5443 Ops.push_back(&I->getOperandUse(0));
5444 return true;
5445 }
5446 default:
5447 break;
5448 }
5449
5450 if (->getType()->isVectorTy())
5451 return false;
5452
5453 switch (I->getOpcode()) {
5454 case Instruction::Sub:
5455 case Instruction::Add: {
5457 return false;
5458
5459
5460
5461 auto Ext1 = cast(I->getOperand(0));
5462 auto Ext2 = cast(I->getOperand(1));
5464 Ops.push_back(&Ext1->getOperandUse(0));
5465 Ops.push_back(&Ext2->getOperandUse(0));
5466 }
5467
5468 Ops.push_back(&I->getOperandUse(0));
5469 Ops.push_back(&I->getOperandUse(1));
5470
5471 return true;
5472 }
5473 case Instruction::Or: {
5474
5475
5476 if (ST->hasNEON()) {
5478 Value *MaskValue;
5479
5483 if (match(OtherAnd,
5485 Instruction *MainAnd = I->getOperand(0) == OtherAnd
5486 ? cast(I->getOperand(1))
5487 : cast(I->getOperand(0));
5488
5489
5490 if (I->getParent() != MainAnd->getParent() ||
5491 I->getParent() != OtherAnd->getParent())
5492 return false;
5493
5494
5495 if (I->getParent() != IA->getParent() ||
5496 I->getParent() != IB->getParent())
5497 return false;
5498
5501 Ops.push_back(&I->getOperandUse(0));
5502 Ops.push_back(&I->getOperandUse(1));
5503
5504 return true;
5505 }
5506 }
5507 }
5508
5509 return false;
5510 }
5511 case Instruction::Mul: {
5512 auto ShouldSinkSplatForIndexedVariant = [](Value *V) {
5513 auto *Ty = cast(V->getType());
5514
5515 if (Ty->isScalableTy())
5516 return false;
5517
5518
5519 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
5520 };
5521
5522 int NumZExts = 0, NumSExts = 0;
5523 for (auto &Op : I->operands()) {
5524
5525 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
5526 continue;
5527
5529 auto *Ext = cast(Op);
5530 auto *ExtOp = Ext->getOperand(0);
5531 if (isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
5532 Ops.push_back(&Ext->getOperandUse(0));
5534
5535 if (isa(Ext))
5536 NumSExts++;
5537 else
5538 NumZExts++;
5539
5540 continue;
5541 }
5542
5544 if (!Shuffle)
5545 continue;
5546
5547
5548
5549
5555 NumSExts++;
5556 else
5557 NumZExts++;
5558 continue;
5559 }
5560
5562 InsertElementInst *Insert = dyn_cast(ShuffleOperand);
5563 if (!Insert)
5564 continue;
5565
5566 Instruction *OperandInstr = dyn_cast(Insert->getOperand(1));
5567 if (!OperandInstr)
5568 continue;
5569
5571 dyn_cast(Insert->getOperand(2));
5572
5573 if (!ElementConstant || !ElementConstant->isZero())
5574 continue;
5575
5576 unsigned Opcode = OperandInstr->getOpcode();
5577 if (Opcode == Instruction::SExt)
5578 NumSExts++;
5579 else if (Opcode == Instruction::ZExt)
5580 NumZExts++;
5581 else {
5582
5583
5584 unsigned Bitwidth = I->getType()->getScalarSizeInBits();
5588 continue;
5589 NumZExts++;
5590 }
5591
5592
5593
5595 Ops.push_back(&Insert->getOperandUse(1));
5598 }
5599
5600
5601 if (!Ops.empty() && (NumSExts == 2 || NumZExts == 2))
5602 return true;
5603
5604
5605 if (!ShouldSinkSplatForIndexedVariant(I))
5606 return false;
5607
5610 Ops.push_back(&I->getOperandUse(0));
5612 Ops.push_back(&I->getOperandUse(1));
5613
5614 return !Ops.empty();
5615 }
5616 case Instruction::FMul: {
5617
5618 if (I->getType()->isScalableTy())
5619 return false;
5620
5621 if (cast(I->getType())->getElementType()->isHalfTy() &&
5622 !ST->hasFullFP16())
5623 return false;
5624
5625
5627 Ops.push_back(&I->getOperandUse(0));
5629 Ops.push_back(&I->getOperandUse(1));
5630 return !Ops.empty();
5631 }
5632 default:
5633 return false;
5634 }
5635 return false;
5636}
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static std::optional< Instruction * > instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID)
TailFoldingOption TailFoldingOptionLoc
static std::optional< Instruction * > instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II, bool MergeIntoAddendOp)
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
bool SimplifyValuePattern(SmallVector< Value * > &Vec, bool AllowPoison)
static std::optional< Instruction * > instCombineSVESel(InstCombiner &IC, IntrinsicInst &II)
static bool shouldSinkVScale(Value *Op, SmallVectorImpl< Use * > &Ops)
We want to sink following cases: (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A,...
static std::optional< Instruction * > tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEUnpack(InstCombiner &IC, IntrinsicInst &II)
static cl::opt< unsigned > SVETailFoldInsnThreshold("sve-tail-folding-insn-threshold", cl::init(15), cl::Hidden)
static cl::opt< bool > EnableFixedwidthAutovecInStreamingMode("enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden)
static std::optional< Instruction * > instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID)
static std::optional< Instruction * > instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static cl::opt< bool > EnableLSRCostOpt("enable-aarch64-lsr-cost-opt", cl::init(true), cl::Hidden)
static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl< Use * > &Ops)
static std::optional< Instruction * > instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > processPhiNode(InstCombiner &IC, IntrinsicInst &II)
The function will remove redundant reinterprets casting in the presence of the control flow.
static std::optional< Instruction * > instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, int PredPos)
static std::optional< Instruction * > instCombineSVEInsr(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVESDIV(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)
static bool containsDecreasingPointers(Loop *TheLoop, PredicatedScalarEvolution *PSE)
static std::optional< Instruction * > instCombineSVEAllActive(IntrinsicInst &II, Intrinsic::ID IID)
static std::optional< Instruction * > instCombineSVENoActiveZero(InstCombiner &IC, IntrinsicInst &II)
static cl::opt< bool > SVEPreferFixedOverScalableIfEqualCost("sve-prefer-fixed-over-scalable-if-equal", cl::Hidden)
static bool isUnpackedVectorVT(EVT VecVT)
static std::optional< Instruction * > instCombineSVEDupX(InstCombiner &IC, IntrinsicInst &II)
static void getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP, AArch64TTIImpl &TTI)
For Apple CPUs, we want to runtime-unroll loops to make better use if the OOO engine's wide instructi...
static std::optional< Instruction * > instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineDMB(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineMaxMinNM(InstCombiner &IC, IntrinsicInst &II)
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA)
static cl::opt< unsigned > SVEGatherOverhead("sve-gather-overhead", cl::init(10), cl::Hidden)
static std::optional< Instruction * > instCombineSVECondLast(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II)
static cl::opt< unsigned > BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden, cl::desc("The cost of a histcnt instruction"))
static std::optional< Instruction * > instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II)
static cl::opt< unsigned > CallPenaltyChangeSM("call-penalty-sm-change", cl::init(5), cl::Hidden, cl::desc("Penalty of calling a function that requires a change to PSTATE.SM"))
static std::optional< Instruction * > instCombineSVEUzp1(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II)
static cl::opt< bool > EnableScalableAutovecInStreamingMode("enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden)
static std::optional< Instruction * > instCombineSVETBL(InstCombiner &IC, IntrinsicInst &II)
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)
Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic)
static bool isSplatShuffle(Value *V)
static cl::opt< unsigned > InlineCallPenaltyChangeSM("inline-call-penalty-sm-change", cl::init(10), cl::Hidden, cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"))
static std::optional< Instruction * > instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)
static std::optional< Instruction * > instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II, bool hasInactiveVector)
static std::optional< Instruction * > instCombineSVESrshl(InstCombiner &IC, IntrinsicInst &II)
static bool isSMEABIRoutineCall(const CallInst &CI)
static cl::opt< unsigned > DMBLookaheadThreshold("dmb-lookahead-threshold", cl::init(10), cl::Hidden, cl::desc("The number of instructions to search for a redundant dmb"))
static unsigned getSVEGatherScatterOverhead(unsigned Opcode, const AArch64Subtarget *ST)
static bool isOperandOfVmullHighP64(Value *Op)
Check if Op could be used with vmull_high_p64 intrinsic.
static std::optional< Instruction * > instCombineSVELast(InstCombiner &IC, IntrinsicInst &II)
static cl::opt< unsigned > NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10), cl::Hidden)
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
static std::optional< Instruction * > instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts)
static bool hasPossibleIncompatibleOps(const Function *F)
Returns true if the function has explicit operations that can only be lowered using incompatible inst...
cl::opt< TailFoldingOption, true, cl::parser< std::string > > SVETailFolding("sve-tail-folding", cl::desc("Control the use of vectorisation using tail-folding for SVE where the" " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:" "\ndisabled (Initial) No loop types will vectorize using " "tail-folding" "\ndefault (Initial) Uses the default tail-folding settings for " "the target CPU" "\nall (Initial) All legal loop types will vectorize using " "tail-folding" "\nsimple (Initial) Use tail-folding for simple loops (not " "reductions or recurrences)" "\nreductions Use tail-folding for loops containing reductions" "\nnoreductions Inverse of above" "\nrecurrences Use tail-folding for loops containing fixed order " "recurrences" "\nnorecurrences Inverse of above" "\nreverse Use tail-folding for loops requiring reversed " "predicates" "\nnoreverse Inverse of above"), cl::location(TailFoldingOptionLoc))
static bool areExtractShuffleVectors(Value *Op1, Value *Op2, bool AllowSplat=false)
Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...
static std::optional< Instruction * > instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II)
static cl::opt< bool > EnableOrLikeSelectOpt("enable-aarch64-or-like-select", cl::init(true), cl::Hidden)
static cl::opt< unsigned > SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden)
static std::optional< Instruction * > instCombineSVEDupqLane(InstCombiner &IC, IntrinsicInst &II)
This file a TargetTransformInfo::Concept conforming object specific to the AArch64 target machine.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getEpilogueVectorizationMinVF() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
unsigned getEpilogueVectorizationMinVF() const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
uint64_t getFeatureMask(const Function &F) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool preferFixedOverScalableIfEqualCost() const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool enableScalableVectorization() const
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isMultiversionedFunction(const Function &F) const
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isIntPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This instruction constructs a fixed permutation of two input vectors.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
bool shouldTreatInstructionLikeSelect(const Instruction *I)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
bool isLoweredToCall(const Function *F) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
PopcntSupportKind
Flags indicating the kind of support for population count.
PartialReductionExtendKind
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
uint64_t getFMVPriority(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
VScaleVal_match m_VScale()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
OperandValueInfo getNoProps() const
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Type Conversion Cost Table.